diff --git a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py index c4bc83f46..c68538e1f 100755 --- a/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py +++ b/egs/wenetspeech/ASR/local/compute_fbank_wenetspeech_splits.py @@ -106,6 +106,13 @@ def get_parser(): default=False, help="Use WhisperFbank instead of Fbank. Default: False.", ) + + parser.add_argument( + "--output-dir-prefix", + type=str, + default="", + help="Prefix of the output directory.", + ) return parser @@ -115,6 +122,7 @@ def compute_fbank_wenetspeech_splits(args): num_splits = args.num_splits output_dir = f"data/fbank/{subset}_split_{num_splits}" output_dir = Path(output_dir) + output_dir = Path(args.output_dir_prefix) / output_dir assert output_dir.exists(), f"{output_dir} does not exist!" num_digits = len(str(num_splits)) @@ -130,10 +138,10 @@ def compute_fbank_wenetspeech_splits(args): if torch.cuda.is_available(): device = torch.device("cuda", 0) if args.whisper_fbank: - # extractor = WhisperFbank( - # WhisperFbankConfig(num_filters=args.num_mel_bins, device=device) - # ) - extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device)) + extractor = WhisperFbank( + WhisperFbankConfig(num_filters=args.num_mel_bins, device=device) + ) + # extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device)) else: extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) logging.info(f"device: {device}") diff --git a/egs/wenetspeech/ASR/prepare.sh b/egs/wenetspeech/ASR/prepare.sh index 9002bde3f..118d7c19f 100755 --- a/egs/wenetspeech/ASR/prepare.sh +++ b/egs/wenetspeech/ASR/prepare.sh @@ -215,10 +215,11 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then python3 ./local/compute_fbank_wenetspeech_splits.py \ --training-subset L \ - --num-workers 80 \ + --num-workers 20 \ --batch-duration 1600 \ --start 98 \ --num-mel-bins ${whisper_mel_bins} --whisper-fbank false \ + --output-dir-prefix /fbank \ --num-splits $num_splits fi