From 559c8a716039bc1f3da2a4d1487292830fd21f06 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 17:10:17 +0800 Subject: [PATCH 1/4] fixed a typo in `prepare.sh` for alimeeting recipes (#1747) --- egs/alimeeting/ASR/prepare.sh | 2 +- egs/alimeeting/ASR_v2/prepare.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/alimeeting/ASR/prepare.sh b/egs/alimeeting/ASR/prepare.sh index 996a1da2d..55f9f019b 100755 --- a/egs/alimeeting/ASR/prepare.sh +++ b/egs/alimeeting/ASR/prepare.sh @@ -87,7 +87,7 @@ fi if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Prepare musan manifest" # We assume that you have downloaded the musan corpus - # to data/musan + # to $dl_dir/musan if [ ! -f data/manifests/.musan_manifests.done ]; then log "It may take 6 minutes" mkdir -p data/manifests diff --git a/egs/alimeeting/ASR_v2/prepare.sh b/egs/alimeeting/ASR_v2/prepare.sh index 15c20692d..1881cd75c 100755 --- a/egs/alimeeting/ASR_v2/prepare.sh +++ b/egs/alimeeting/ASR_v2/prepare.sh @@ -65,7 +65,7 @@ fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Prepare musan manifest" # We assume that you have downloaded the musan corpus - # to data/musan + # to $dl_dir/musan mkdir -p data/manifests lhotse prepare musan $dl_dir/musan data/manifests fi From 2ff0bb6a884c8f5aafa48551fba8c7d0eeb15b96 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 8 Sep 2024 17:42:55 +0800 Subject: [PATCH 2/4] fix CI tests (#1748) --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9eb7e403c..c22f2edb5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -108,4 +108,4 @@ jobs: - uses: actions/upload-artifact@v4 with: path: egs/librispeech/ASR/zipformer/swoosh.pdf - name: swoosh.pdf + name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }} From 65b8a6c730568ed12fccccb244e013f6ae3d7745 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 20:34:49 +0800 Subject: [PATCH 3/4] fixed wrong default value for the `alimeeting` recipe (#1750) --- .../pruned_transducer_stateless7/asr_datamodule.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py b/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py index 6b56c8a6a..9da820315 100644 --- a/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py +++ b/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py @@ -82,7 +82,7 @@ class AlimeetingAsrDataModule: group.add_argument( "--manifest-dir", type=Path, - default=Path("data/manifests"), + default=Path("data/fbank"), help="Path to directory with train/valid/test cuts.", ) group.add_argument( @@ -327,9 +327,11 @@ class AlimeetingAsrDataModule: def test_dataloaders(self, cuts: CutSet) -> DataLoader: logging.debug("About to create test dataset") test = K2SpeechRecognitionDataset( - input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) - if self.args.on_the_fly_feats - else PrecomputedFeatures(), + input_strategy=( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + if self.args.on_the_fly_feats + else PrecomputedFeatures() + ), return_cuts=True, ) sampler = DynamicBucketingSampler( From a394bf74742c0242f35a514e016df74d6ba42505 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 20:35:07 +0800 Subject: [PATCH 4/4] fixed gss scripts for `alimeeting` and `ami` recipes (#1749) --- egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh | 4 ++-- egs/ami/ASR/local/prepare_ami_gss.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh b/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh index 76db19832..bd25bc9e5 100755 --- a/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh +++ b/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh @@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then # for train, we use smaller context and larger batches to speed-up processing for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \ - $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \ + $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 5.0 \ --use-garbage-class \ @@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then for part in eval test; do for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \ - $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \ + $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \ $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 15.0 \ diff --git a/egs/ami/ASR/local/prepare_ami_gss.sh b/egs/ami/ASR/local/prepare_ami_gss.sh index d5422458b..414c22b12 100755 --- a/egs/ami/ASR/local/prepare_ami_gss.sh +++ b/egs/ami/ASR/local/prepare_ami_gss.sh @@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then # for train, we use smaller context and larger batches to speed-up processing for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \ - $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \ + $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 5.0 \ --use-garbage-class \ @@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then for part in dev test; do for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \ - $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \ + $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \ $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 15.0 \