change compute feature batch

This commit is contained in:
Yuekai Zhang 2024-01-23 17:23:11 +08:00
parent f4cf9fb2d3
commit fd77c5758c
2 changed files with 12 additions and 5 deletions

View File

@ -157,15 +157,22 @@ def compute_fbank_wenetspeech_splits(args):
)
logging.info("Computing features")
cut_set = cut_set.compute_and_store_features_batch(
# cut_set = cut_set.compute_and_store_features_batch(
# extractor=extractor,
# storage_path=f"{output_dir}/feats_{subset}_{idx}",
# num_workers=args.num_workers,
# batch_duration=args.batch_duration,
# storage_type=LilcomChunkyWriter,
# overwrite=True,
# )
cut_set = cut_set.compute_and_store_features(
extractor=extractor,
storage_path=f"{output_dir}/feats_{subset}_{idx}",
num_workers=args.num_workers,
batch_duration=args.batch_duration,
num_jobs=args.num_workers,
executor=extractor,
storage_type=LilcomChunkyWriter,
overwrite=True,
)
logging.info(f"Saving to {cuts_path}")
cut_set.to_file(cuts_path)

View File

@ -215,7 +215,7 @@ if [ $stage -le 131 ] && [ $stop_stage -ge 131 ]; then
python3 ./local/compute_fbank_wenetspeech_splits.py \
--training-subset L \
--num-workers 8 \
--num-workers 80 \
--batch-duration 1600 \
--start 98 \
--num-mel-bins ${whisper_mel_bins} --whisper-fbank false \