This commit is contained in:
yfyeung 2025-07-10 03:15:54 +00:00
parent dda9b40ba3
commit 380d0fa270
5 changed files with 11 additions and 10 deletions

View File

@ -112,9 +112,10 @@ def compute_fbank_gigaspeech_splits(args):
cut_set = CutSet.from_file(raw_cuts_path)
logging.info("Computing features")
if (output_dir / f"gigaspeech_feats_XL_{idx}.lca").exists():
logging.info(f"Removing {output_dir}/gigaspeech_feats_XL_{idx}.lca")
os.remove(output_dir / f"gigaspeech_feats_XL_{idx}.lca")
filename = output_dir / f"gigaspeech_feats_XL_{idx}.lca"
if filename.exists():
logging.info(f"Removing {filename}")
os.remove(str(filename))
cut_set = cut_set.compute_and_store_features_batch(
extractor=extractor,

View File

@ -13,13 +13,13 @@ stop_stage=8
# Compute fbank features for a subset of splits from `start` (inclusive) to `stop` (exclusive)
start=0
stop=-1
stop=-1 # -1 means until the end
# Note: This script just prepare the minimal requirements that needed by a
# Note: This script just prepares the minimal requirements needed by a
# transducer training with bpe units.
#
# If you want to use ngram, please continue running prepare_lm.sh after
# you succeed running this script.
# you succeed in running this script.
#
# This script also contains the steps to generate phone based units, but they
# will not run automatically, you can generate the phone based units by

View File

@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
# This script generate Ngram LM and related files that needed by decoding.
# This script generates Ngram LM and related files needed by decoding.
# We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded

View File

@ -10,11 +10,11 @@ nj=15
stage=0
stop_stage=5
# Note: This script just prepare the minimal requirements that needed by a
# Note: This script just prepares the minimal requirements needed by a
# transducer training with bpe units.
#
# If you want to use ngram or nnlm, please continue running prepare_lm.sh after
# you succeed running this script.
# you succeed in running this script.
#
# This script also contains the steps to generate phone based units, but they
# will not run automatically, you can generate the phone based units by

View File

@ -5,7 +5,7 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
set -eou pipefail
# This script generate Ngram LM / NNLM and related files that needed by decoding.
# This script generates Ngram LM / NNLM and related files needed by decoding.
# We assume dl_dir (download dir) contains the following
# directories and files. If not, they will be downloaded