mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
restore version of mls_english compute_fbank_mls_english.py and prepare.sh from commit 547f5c5
This commit is contained in:
parent
e34f2dbb2a
commit
daff070d68
@ -120,6 +120,11 @@ def main():
|
||||
cut_sets = make_cutset_blueprints(mls_eng_hf_dataset_path)
|
||||
for part, cut_set in cut_sets:
|
||||
logging.info(f"Processing {part}")
|
||||
cut_set = cut_set.save_audios(
|
||||
num_jobs=num_jobs,
|
||||
storage_path=(args.audio_dir / part).as_posix(),
|
||||
) # makes new cutset that loads audio from paths to actual audio files
|
||||
|
||||
cut_set = cut_set.compute_and_store_features(
|
||||
extractor=extractor,
|
||||
num_jobs=num_jobs,
|
||||
@ -127,7 +132,6 @@ def main():
|
||||
storage_type=LilcomChunkyWriter,
|
||||
)
|
||||
|
||||
cut_set = cut_set.save_audios(args.audio_dir / part) # makes new cutset that uses paths to actual audio files
|
||||
cut_set.to_file(args.manifest_dir / f"mls_eng_cuts_{part}.jsonl.gz")
|
||||
|
||||
logging.info("All fbank computed for MLS English.")
|
||||
|
@ -60,9 +60,9 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
--audio-dir data/audio \
|
||||
--dl-dir $dl_dir/mls_english
|
||||
# --dl-dir /root/datasets/parler-tts--mls_eng
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_english_cuts_train.jsonl.gz
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_english_cuts_dev.jsonl.gz
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_english_cuts_test.jsonl.gz
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_eng_cuts_train.jsonl.gz
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_eng_cuts_dev.jsonl.gz
|
||||
python local/validate_manifest.py --manifest data/manifests/mls_eng_cuts_test.jsonl.gz
|
||||
touch data/manifests/.mls_english-validated.done
|
||||
fi
|
||||
fi
|
||||
@ -71,7 +71,10 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Prepare transcript for BPE training"
|
||||
if [ ! -f data/lang/transcript.txt ]; then
|
||||
log "Generating transcripts for BPE training"
|
||||
./local/utils/generate_transcript.py --lang-dir data/lang
|
||||
python local/utils/generate_transcript.py \
|
||||
--dataset-path $dl_dir/mls_english \
|
||||
--lang-dir data/lang \
|
||||
--split train
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -83,7 +86,7 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
mkdir -p $bpe_dir
|
||||
|
||||
if [ ! -f $bpe_dir/bpe.model ]; then
|
||||
./local/train_bpe_model.py \
|
||||
python local/train_bpe_model.py \
|
||||
--lang-dir $bpe_dir \
|
||||
--vocab-size $vocab_size \
|
||||
--transcript data/lang/transcript.txt
|
||||
|
Loading…
x
Reference in New Issue
Block a user