generate train-all-shuf for whisper fbank

This commit is contained in:
marcoyang 2024-03-28 15:56:04 +08:00
parent 76e0d59267
commit eb685364df

View File

@ -249,7 +249,7 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
log "Stage 7: Prepare whisper fbank feature"
perturb_speed=1
whisper_mel_bins=80
output_dir=data/fbank_whisper_${whisper_mel_bins}D
output_dir=data/fbank_whisper_${whisper_mel_bins}D_hdf5
if [ ! -f $output_dir/.librispeech.whisper.done ]; then
mkdir -p $output_dir
./local/compute_fbank_librispeech.py \
@ -262,4 +262,10 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
--output-dir $output_dir
touch $output_dir/.librispeech.whisper.done
fi
if [ ! -f ${output_dir}/librispeech_cuts_train-all-shuf.jsonl.gz ]; then
cat <(gunzip -c ${output_dir}/librispeech_cuts_train-clean-100.jsonl.gz) \
<(gunzip -c ${output_dir}/librispeech_cuts_train-clean-360.jsonl.gz) \
<(gunzip -c ${output_dir}/librispeech_cuts_train-other-500.jsonl.gz) | \
shuf | gzip -c > ${output_dir}/librispeech_cuts_train-all-shuf.jsonl.gz
fi
fi