fix io issue

This commit is contained in:
Yuekai Zhang 2024-01-23 21:54:32 +08:00
parent af29455c3d
commit df54121c41
2 changed files with 14 additions and 5 deletions

View File

@ -106,6 +106,13 @@ def get_parser():
default=False, default=False,
help="Use WhisperFbank instead of Fbank. Default: False.", help="Use WhisperFbank instead of Fbank. Default: False.",
) )
parser.add_argument(
"--output-dir-prefix",
type=str,
default="",
help="Prefix of the output directory.",
)
return parser return parser
@ -115,6 +122,7 @@ def compute_fbank_wenetspeech_splits(args):
num_splits = args.num_splits num_splits = args.num_splits
output_dir = f"data/fbank/{subset}_split_{num_splits}" output_dir = f"data/fbank/{subset}_split_{num_splits}"
output_dir = Path(output_dir) output_dir = Path(output_dir)
output_dir = Path(args.output_dir_prefix) / output_dir
assert output_dir.exists(), f"{output_dir} does not exist!" assert output_dir.exists(), f"{output_dir} does not exist!"
num_digits = len(str(num_splits)) num_digits = len(str(num_splits))
@ -130,10 +138,10 @@ def compute_fbank_wenetspeech_splits(args):
if torch.cuda.is_available(): if torch.cuda.is_available():
device = torch.device("cuda", 0) device = torch.device("cuda", 0)
if args.whisper_fbank: if args.whisper_fbank:
# extractor = WhisperFbank( extractor = WhisperFbank(
# WhisperFbankConfig(num_filters=args.num_mel_bins, device=device) WhisperFbankConfig(num_filters=args.num_mel_bins, device=device)
# ) )
extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device)) # extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device))
else: else:
extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device))
logging.info(f"device: {device}") logging.info(f"device: {device}")

View File

@ -215,10 +215,11 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \ python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \ --training-subset L \
--num-workers 80 \ --num-workers 20 \
--batch-duration 1600 \ --batch-duration 1600 \
--start 98 \ --start 98 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank false \ --num-mel-bins ${whisper_mel_bins} --whisper-fbank false \
--output-dir-prefix /fbank \
--num-splits $num_splits --num-splits $num_splits
fi fi