This commit is contained in:
yfyeung 2025-07-10 03:15:54 +00:00
parent dda9b40ba3
commit 380d0fa270
5 changed files with 11 additions and 10 deletions

View File

@ -112,9 +112,10 @@ def compute_fbank_gigaspeech_splits(args):
cut_set = CutSet.from_file(raw_cuts_path) cut_set = CutSet.from_file(raw_cuts_path)
logging.info("Computing features") logging.info("Computing features")
if (output_dir / f"gigaspeech_feats_XL_{idx}.lca").exists(): filename = output_dir / f"gigaspeech_feats_XL_{idx}.lca"
logging.info(f"Removing {output_dir}/gigaspeech_feats_XL_{idx}.lca") if filename.exists():
os.remove(output_dir / f"gigaspeech_feats_XL_{idx}.lca") logging.info(f"Removing {filename}")
os.remove(str(filename))
cut_set = cut_set.compute_and_store_features_batch( cut_set = cut_set.compute_and_store_features_batch(
extractor=extractor, extractor=extractor,

View File

@ -13,13 +13,13 @@ stop_stage=8
# Compute fbank features for a subset of splits from `start` (inclusive) to `stop` (exclusive) # Compute fbank features for a subset of splits from `start` (inclusive) to `stop` (exclusive)
start=0 start=0
stop=-1 stop=-1 # -1 means until the end
# Note: This script just prepare the minimal requirements that needed by a # Note: This script just prepares the minimal requirements needed by a
# transducer training with bpe units. # transducer training with bpe units.
# #
# If you want to use ngram, please continue running prepare_lm.sh after # If you want to use ngram, please continue running prepare_lm.sh after
# you succeed running this script. # you succeed in running this script.
# #
# This script also contains the steps to generate phone based units, but they # This script also contains the steps to generate phone based units, but they
# will not run automatically, you can generate the phone based units by # will not run automatically, you can generate the phone based units by

View File

@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
# This script generate Ngram LM and related files that needed by decoding. # This script generates Ngram LM and related files needed by decoding.
# We assume dl_dir (download dir) contains the following # We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded # directories and files. If not, they will be downloaded

View File

@ -10,11 +10,11 @@ nj=15
stage=0 stage=0
stop_stage=5 stop_stage=5
# Note: This script just prepare the minimal requirements that needed by a # Note: This script just prepares the minimal requirements needed by a
# transducer training with bpe units. # transducer training with bpe units.
# #
# If you want to use ngram or nnlm, please continue running prepare_lm.sh after # If you want to use ngram or nnlm, please continue running prepare_lm.sh after
# you succeed running this script. # you succeed in running this script.
# #
# This script also contains the steps to generate phone based units, but they # This script also contains the steps to generate phone based units, but they
# will not run automatically, you can generate the phone based units by # will not run automatically, you can generate the phone based units by

View File

@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail set -eou pipefail
# This script generate Ngram LM / NNLM and related files that needed by decoding. # This script generates Ngram LM / NNLM and related files needed by decoding.
# We assume dl_dir (download dir) contains the following # We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded # directories and files. If not, they will be downloaded