From 547f5c5cfb6dad2d06fd12c079c2df22fa92e9a6 Mon Sep 17 00:00:00 2001 From: Kinan Martin Date: Fri, 2 May 2025 07:22:38 +0900 Subject: [PATCH] optimize with num_jobs on save_audios --- egs/mls_english/ASR/local/compute_fbank_mls_english.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/egs/mls_english/ASR/local/compute_fbank_mls_english.py b/egs/mls_english/ASR/local/compute_fbank_mls_english.py index 8c5cae842..e9bd81551 100644 --- a/egs/mls_english/ASR/local/compute_fbank_mls_english.py +++ b/egs/mls_english/ASR/local/compute_fbank_mls_english.py @@ -120,6 +120,11 @@ def main(): cut_sets = make_cutset_blueprints(mls_eng_hf_dataset_path) for part, cut_set in cut_sets: logging.info(f"Processing {part}") + cut_set = cut_set.save_audios( + num_jobs=num_jobs, + storage_path=(args.audio_dir / part).as_posix(), + ) # makes new cutset that loads audio from paths to actual audio files + cut_set = cut_set.compute_and_store_features( extractor=extractor, num_jobs=num_jobs, @@ -127,7 +132,6 @@ def main(): storage_type=LilcomChunkyWriter, ) - cut_set = cut_set.save_audios(args.audio_dir / part) # makes new cutset that uses paths to actual audio files cut_set.to_file(args.manifest_dir / f"mls_eng_cuts_{part}.jsonl.gz") logging.info("All fbank computed for MLS English.")