small fixes to prepare.sh (#789)

This commit is contained in:
Fangjun Kuang 2022-12-27 09:25:42 +08:00 committed by GitHub
parent 4e249da2c4
commit dfbcf606e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -123,10 +123,12 @@ if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
touch data/fbank/.librispeech.done touch data/fbank/.librispeech.done
fi fi
cat <(gunzip -c data/fbank/librispeech_cuts_train-clean-100.jsonl.gz) \ if [ ! -f data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz ]; then
<(gunzip -c data/fbank/librispeech_cuts_train-clean-360.jsonl.gz) \ cat <(gunzip -c data/fbank/librispeech_cuts_train-clean-100.jsonl.gz) \
<(gunzip -c data/fbank/librispeech_cuts_train-other-500.jsonl.gz) | \ <(gunzip -c data/fbank/librispeech_cuts_train-clean-360.jsonl.gz) \
shuf | gzip -c > data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz <(gunzip -c data/fbank/librispeech_cuts_train-other-500.jsonl.gz) | \
shuf | gzip -c > data/fbank/librispeech_cuts_train-all-shuf.jsonl.gz
fi
if [ ! -e data/fbank/.librispeech-validated.done ]; then if [ ! -e data/fbank/.librispeech-validated.done ]; then
log "Validating data/fbank for LibriSpeech" log "Validating data/fbank for LibriSpeech"
@ -244,7 +246,7 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
fi fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
log "Stage 7: Prepare bigram P" log "Stage 7: Prepare bigram token-level P for MMI training"
for vocab_size in ${vocab_sizes[@]}; do for vocab_size in ${vocab_sizes[@]}; do
lang_dir=data/lang_bpe_${vocab_size} lang_dir=data/lang_bpe_${vocab_size}