From 380d0fa27086853cec8ea76315b4f8ddc7311cac Mon Sep 17 00:00:00 2001 From: yfyeung Date: Thu, 10 Jul 2025 03:15:54 +0000 Subject: [PATCH] fix --- .../ASR/local/compute_fbank_gigaspeech_splits.py | 7 ++++--- egs/gigaspeech/ASR/prepare.sh | 6 +++--- egs/gigaspeech/ASR/prepare_lm.sh | 2 +- egs/librispeech/ASR/prepare.sh | 4 ++-- egs/librispeech/ASR/prepare_lm.sh | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py b/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py index 0ddcf3317..c1645f7cc 100755 --- a/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py +++ b/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py @@ -112,9 +112,10 @@ def compute_fbank_gigaspeech_splits(args): cut_set = CutSet.from_file(raw_cuts_path) logging.info("Computing features") - if (output_dir / f"gigaspeech_feats_XL_{idx}.lca").exists(): - logging.info(f"Removing {output_dir}/gigaspeech_feats_XL_{idx}.lca") - os.remove(output_dir / f"gigaspeech_feats_XL_{idx}.lca") + filename = output_dir / f"gigaspeech_feats_XL_{idx}.lca" + if filename.exists(): + logging.info(f"Removing {filename}") + os.remove(str(filename)) cut_set = cut_set.compute_and_store_features_batch( extractor=extractor, diff --git a/egs/gigaspeech/ASR/prepare.sh b/egs/gigaspeech/ASR/prepare.sh index f1c0be692..ef6a667f9 100755 --- a/egs/gigaspeech/ASR/prepare.sh +++ b/egs/gigaspeech/ASR/prepare.sh @@ -13,13 +13,13 @@ stop_stage=8 # Compute fbank features for a subset of splits from `start` (inclusive) to `stop` (exclusive) start=0 -stop=-1 +stop=-1 # -1 means until the end -# Note: This script just prepare the minimal requirements that needed by a +# Note: This script just prepares the minimal requirements needed by a # transducer training with bpe units. # # If you want to use ngram, please continue running prepare_lm.sh after -# you succeed running this script. +# you succeed in running this script. # # This script also contains the steps to generate phone based units, but they # will not run automatically, you can generate the phone based units by diff --git a/egs/gigaspeech/ASR/prepare_lm.sh b/egs/gigaspeech/ASR/prepare_lm.sh index a6954a4de..3fcf899a3 100755 --- a/egs/gigaspeech/ASR/prepare_lm.sh +++ b/egs/gigaspeech/ASR/prepare_lm.sh @@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail -# This script generate Ngram LM and related files that needed by decoding. +# This script generates Ngram LM and related files needed by decoding. # We assume dl_dir (download dir) contains the following # directories and files. If not, they will be downloaded diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh index 40dc3260d..cf3dc9adb 100755 --- a/egs/librispeech/ASR/prepare.sh +++ b/egs/librispeech/ASR/prepare.sh @@ -10,11 +10,11 @@ nj=15 stage=0 stop_stage=5 -# Note: This script just prepare the minimal requirements that needed by a +# Note: This script just prepares the minimal requirements needed by a # transducer training with bpe units. # # If you want to use ngram or nnlm, please continue running prepare_lm.sh after -# you succeed running this script. +# you succeed in running this script. # # This script also contains the steps to generate phone based units, but they # will not run automatically, you can generate the phone based units by diff --git a/egs/librispeech/ASR/prepare_lm.sh b/egs/librispeech/ASR/prepare_lm.sh index 1792395d8..55a6e021c 100755 --- a/egs/librispeech/ASR/prepare_lm.sh +++ b/egs/librispeech/ASR/prepare_lm.sh @@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python set -eou pipefail -# This script generate Ngram LM / NNLM and related files that needed by decoding. +# This script generates Ngram LM / NNLM and related files needed by decoding. # We assume dl_dir (download dir) contains the following # directories and files. If not, they will be downloaded