fix io issue

This commit is contained in:
Yuekai Zhang 2024-01-23 21:54:32 +08:00
parent af29455c3d
commit df54121c41
2 changed files with 14 additions and 5 deletions

View File

@ -106,6 +106,13 @@ def get_parser():
default=False,
help="Use WhisperFbank instead of Fbank. Default: False.",
)
parser.add_argument(
"--output-dir-prefix",
type=str,
default="",
help="Prefix of the output directory.",
)
return parser
@ -115,6 +122,7 @@ def compute_fbank_wenetspeech_splits(args):
num_splits = args.num_splits
output_dir = f"data/fbank/{subset}_split_{num_splits}"
output_dir = Path(output_dir)
output_dir = Path(args.output_dir_prefix) / output_dir
assert output_dir.exists(), f"{output_dir} does not exist!"
num_digits = len(str(num_splits))
@ -130,10 +138,10 @@ def compute_fbank_wenetspeech_splits(args):
if torch.cuda.is_available():
device = torch.device("cuda", 0)
if args.whisper_fbank:
# extractor = WhisperFbank(
# WhisperFbankConfig(num_filters=args.num_mel_bins, device=device)
# )
extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device))
extractor = WhisperFbank(
WhisperFbankConfig(num_filters=args.num_mel_bins, device=device)
)
# extractor = KaldifeatWhisperFbank(KaldifeatWhisperFbankConfig(num_filters=args.num_mel_bins, device=device))
else:
extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device))
logging.info(f"device: {device}")

View File

@ -215,10 +215,11 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \
--num-workers 80 \
--num-workers 20 \
--batch-duration 1600 \
--start 98 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank false \
--output-dir-prefix /fbank \
--num-splits $num_splits
fi