mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
Merge branch 'k2-fsa:master' into dev/k2ssl
This commit is contained in:
commit
6a30568a35
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@ -108,4 +108,4 @@ jobs:
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: egs/librispeech/ASR/zipformer/swoosh.pdf
|
||||
name: swoosh.pdf
|
||||
name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
||||
|
@ -87,7 +87,7 @@ fi
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
log "Stage 3: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
# to $dl_dir/musan
|
||||
if [ ! -f data/manifests/.musan_manifests.done ]; then
|
||||
log "It may take 6 minutes"
|
||||
mkdir -p data/manifests
|
||||
|
@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
|
||||
# for train, we use smaller context and larger batches to speed-up processing
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 5.0 \
|
||||
--use-garbage-class \
|
||||
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
|
||||
for part in eval test; do
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
|
||||
$EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 15.0 \
|
||||
|
@ -65,7 +65,7 @@ fi
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
# to $dl_dir/musan
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare musan $dl_dir/musan data/manifests
|
||||
fi
|
||||
|
@ -82,7 +82,7 @@ class AlimeetingAsrDataModule:
|
||||
group.add_argument(
|
||||
"--manifest-dir",
|
||||
type=Path,
|
||||
default=Path("data/manifests"),
|
||||
default=Path("data/fbank"),
|
||||
help="Path to directory with train/valid/test cuts.",
|
||||
)
|
||||
group.add_argument(
|
||||
@ -327,9 +327,11 @@ class AlimeetingAsrDataModule:
|
||||
def test_dataloaders(self, cuts: CutSet) -> DataLoader:
|
||||
logging.debug("About to create test dataset")
|
||||
test = K2SpeechRecognitionDataset(
|
||||
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
|
||||
if self.args.on_the_fly_feats
|
||||
else PrecomputedFeatures(),
|
||||
input_strategy=(
|
||||
OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
|
||||
if self.args.on_the_fly_feats
|
||||
else PrecomputedFeatures()
|
||||
),
|
||||
return_cuts=True,
|
||||
)
|
||||
sampler = DynamicBucketingSampler(
|
||||
|
@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
|
||||
# for train, we use smaller context and larger batches to speed-up processing
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 5.0 \
|
||||
--use-garbage-class \
|
||||
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
|
||||
for part in dev test; do
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
|
||||
$EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 15.0 \
|
||||
|
Loading…
x
Reference in New Issue
Block a user