From 380d0fa27086853cec8ea76315b4f8ddc7311cac Mon Sep 17 00:00:00 2001
From: yfyeung <yifanyeung@sjtu.edu.cn>
Date: Thu, 10 Jul 2025 03:15:54 +0000
Subject: [PATCH] fix

---
 .../ASR/local/compute_fbank_gigaspeech_splits.py           | 7 ++++---
 egs/gigaspeech/ASR/prepare.sh                              | 6 +++---
 egs/gigaspeech/ASR/prepare_lm.sh                           | 2 +-
 egs/librispeech/ASR/prepare.sh                             | 4 ++--
 egs/librispeech/ASR/prepare_lm.sh                          | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py b/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py
index 0ddcf3317..c1645f7cc 100755
--- a/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py
+++ b/egs/gigaspeech/ASR/local/compute_fbank_gigaspeech_splits.py
@@ -112,9 +112,10 @@ def compute_fbank_gigaspeech_splits(args):
         cut_set = CutSet.from_file(raw_cuts_path)
 
         logging.info("Computing features")
-        if (output_dir / f"gigaspeech_feats_XL_{idx}.lca").exists():
-            logging.info(f"Removing {output_dir}/gigaspeech_feats_XL_{idx}.lca")
-            os.remove(output_dir / f"gigaspeech_feats_XL_{idx}.lca")
+        filename = output_dir / f"gigaspeech_feats_XL_{idx}.lca"
+        if filename.exists():
+            logging.info(f"Removing {filename}")
+            os.remove(str(filename))
 
         cut_set = cut_set.compute_and_store_features_batch(
             extractor=extractor,
diff --git a/egs/gigaspeech/ASR/prepare.sh b/egs/gigaspeech/ASR/prepare.sh
index f1c0be692..ef6a667f9 100755
--- a/egs/gigaspeech/ASR/prepare.sh
+++ b/egs/gigaspeech/ASR/prepare.sh
@@ -13,13 +13,13 @@ stop_stage=8
 
 # Compute fbank features for a subset of splits from `start` (inclusive) to `stop` (exclusive)
 start=0
-stop=-1
+stop=-1  # -1 means until the end
 
-# Note: This script just prepare the minimal requirements that needed by a
+# Note: This script just prepares the minimal requirements needed by a
 # transducer training with bpe units.
 #
 # If you want to use ngram, please continue running prepare_lm.sh after
-# you succeed running this script.
+# you succeed in running this script.
 #
 # This script also contains the steps to generate phone based units, but they
 # will not run automatically, you can generate the phone based units by
diff --git a/egs/gigaspeech/ASR/prepare_lm.sh b/egs/gigaspeech/ASR/prepare_lm.sh
index a6954a4de..3fcf899a3 100755
--- a/egs/gigaspeech/ASR/prepare_lm.sh
+++ b/egs/gigaspeech/ASR/prepare_lm.sh
@@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 set -eou pipefail
 
-# This script generate Ngram LM and related files that needed by decoding.
+# This script generates Ngram LM and related files needed by decoding.
 
 # We assume dl_dir (download dir) contains the following
 # directories and files. If not, they will be downloaded
diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh
index 40dc3260d..cf3dc9adb 100755
--- a/egs/librispeech/ASR/prepare.sh
+++ b/egs/librispeech/ASR/prepare.sh
@@ -10,11 +10,11 @@ nj=15
 stage=0
 stop_stage=5
 
-# Note: This script just prepare the minimal requirements that needed by a
+# Note: This script just prepares the minimal requirements needed by a
 # transducer training with bpe units.
 #
 # If you want to use ngram or nnlm, please continue running prepare_lm.sh after
-# you succeed running this script.
+# you succeed in running this script.
 #
 # This script also contains the steps to generate phone based units, but they
 # will not run automatically, you can generate the phone based units by
diff --git a/egs/librispeech/ASR/prepare_lm.sh b/egs/librispeech/ASR/prepare_lm.sh
index 1792395d8..55a6e021c 100755
--- a/egs/librispeech/ASR/prepare_lm.sh
+++ b/egs/librispeech/ASR/prepare_lm.sh
@@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 set -eou pipefail
 
-# This script generate Ngram LM / NNLM and related files that needed by decoding.
+# This script generates Ngram LM / NNLM and related files needed by decoding.
 
 # We assume dl_dir (download dir) contains the following
 # directories and files. If not, they will be downloaded