From fd77c5758c7972784fa6cd49fe98bf0bd336e392 Mon Sep 17 00:00:00 2001 From: Yuekai Zhang Date: Tue, 23 Jan 2024 17:23:11 +0800 Subject: [PATCH] change compute feature batch --- .../ASR/local/compute_fbank_wenetspeech_splits.py | 15 +++++++++++---- egs/wenetspeech/ASR/prepare.sh | 2 +- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py index 137dd68de..29b69db27 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py @@ -157,15 +157,22 @@ def compute_fbank_wenetspeech_splits(args): ) logging.info("Computing features") - cut_set = cut_set.compute_and_store_features_batch( + # cut_set = cut_set.compute_and_store_features_batch( + # extractor=extractor, + # storage_path=f"{output_dir}/feats_{subset}_{idx}", + # num_workers=args.num_workers, + # batch_duration=args.batch_duration, + # storage_type=LilcomChunkyWriter, + # overwrite=True, + # ) + cut_set = cut_set.compute_and_store_features( extractor=extractor, storage_path=f"{output_dir}/feats_{subset}_{idx}", - num_workers=args.num_workers, - batch_duration=args.batch_duration, + num_jobs=args.num_workers, + executor=extractor, storage_type=LilcomChunkyWriter, overwrite=True, ) - logging.info(f"Saving to {cuts_path}") cut_set.to_file(cuts_path) diff --git a/egs/wenetspeech/ASR/prepare.sh b/egs/wenetspeech/ASR/prepare.sh index 7d022e7a8..9002bde3f 100755 --- a/egs/wenetspeech/ASR/prepare.sh +++ b/egs/wenetspeech/ASR/prepare.sh @@ -215,7 +215,7 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then python3 ./local/compute_fbank_wenetspeech_splits.py \ --training-subset L \ - --num-workers 8 \ + --num-workers 80 \ --batch-duration 1600 \ --start 98 \ --num-mel-bins ${whisper_mel_bins} --whisper-fbank false \