From dfbcf606e7a7798bc5d9f73da82126914800be0e Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 27 Dec 2022 09:25:42 +0800 Subject: [PATCH] small fixes to prepare.sh (#789) --- egs/librispeech/ASR/prepare.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh index 59bed8389..b1d207049 100755 --- a/egs/librispeech/ASR/prepare.sh +++ b/egs/librispeech/ASR/prepare.sh @@ -123,10 +123,12 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then touch data/fbank/.librispeech.done fi - cat <(gunzip -c data/fbank/librispeech_cuts_train-clean-100.jsonl.gz) \ - <(gunzip -c data/fbank/librispeech_cuts_train-clean-360.jsonl.gz) \ - <(gunzip -c data/fbank/librispeech_cuts_train-other-500.jsonl.gz) | \ - shuf | gzip -c > data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz + if [ ! -f data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz ]; then + cat <(gunzip -c data/fbank/librispeech_cuts_train-clean-100.jsonl.gz) \ + <(gunzip -c data/fbank/librispeech_cuts_train-clean-360.jsonl.gz) \ + <(gunzip -c data/fbank/librispeech_cuts_train-other-500.jsonl.gz) | \ + shuf | gzip -c > data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz + fi if [ ! -e data/fbank/.librispeech-validated.done ]; then log "Validating data/fbank for LibriSpeech" @@ -244,7 +246,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then fi if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then - log "Stage 7: Prepare bigram P" + log "Stage 7: Prepare bigram token-level P for MMI training" for vocab_size in ${vocab_sizes[@]}; do lang_dir=data/lang_bpe_${vocab_size}