diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh index 9f9048a6d..81fe43d84 100755 --- a/egs/librispeech/ASR/prepare.sh +++ b/egs/librispeech/ASR/prepare.sh @@ -249,7 +249,7 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then log "Stage 7: Prepare whisper fbank feature" perturb_speed=1 whisper_mel_bins=80 - output_dir=data/fbank_whisper_${whisper_mel_bins}D + output_dir=data/fbank_whisper_${whisper_mel_bins}D_hdf5 if [ ! -f $output_dir/.librispeech.whisper.done ]; then mkdir -p $output_dir ./local/compute_fbank_librispeech.py \ @@ -262,4 +262,10 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then --output-dir $output_dir touch $output_dir/.librispeech.whisper.done fi + if [ ! -f ${output_dir}/librispeech_cuts_train-all-shuf.jsonl.gz ]; then + cat <(gunzip -c ${output_dir}/librispeech_cuts_train-clean-100.jsonl.gz) \ + <(gunzip -c ${output_dir}/librispeech_cuts_train-clean-360.jsonl.gz) \ + <(gunzip -c ${output_dir}/librispeech_cuts_train-other-500.jsonl.gz) | \ + shuf | gzip -c > ${output_dir}/librispeech_cuts_train-all-shuf.jsonl.gz + fi fi