fixes for data preparation (#1307)

Issue: #1306
This commit is contained in:
zr_jin 2023-10-12 17:05:41 +08:00 committed by GitHub
parent 855492156a
commit 162ceaf4b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 8 deletions

View File

@ -204,10 +204,6 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
if [ ! -f $lang_char_dir/L_disambig.pt ]; then
./local/prepare_char.py --lang-dir $lang_char_dir
fi
if [ ! -f $lang_char_dir/HLG.fst ]; then
./local/prepare_lang_fst.py --lang-dir $lang_phone_dir --ngram-G ./data/lm/G_3_gram.fst.txt
fi
fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
@ -262,6 +258,13 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
--max-order=3 \
data/lm/3-gram.unpruned.arpa > data/lm/G_3_gram_char.fst.txt
fi
if [ ! -f $lang_char_dir/HLG.fst ]; then
lang_phone_dir=data/lang_phone
./local/prepare_lang_fst.py \
--lang-dir $lang_phone_dir \
--ngram-G ./data/lm/G_3_gram.fst.txt
fi
fi
if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then

View File

@ -242,10 +242,6 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
$lang_dir/L_disambig.pt \
$lang_dir/L_disambig.fst
fi
if [ ! -f $lang_dir/HL.fst ]; then
./local/prepare_lang_fst.py --lang-dir $lang_dir --ngram-G ./data/lm/G_3_gram.fst.txt
fi
done
fi
@ -303,6 +299,16 @@ if [ $stage -le 8 ] && [ $stop_stage -ge 8 ]; then
--max-order=4 \
$dl_dir/lm/4-gram.arpa > data/lm/G_4_gram.fst.txt
fi
for vocab_size in ${vocab_sizes[@]}; do
lang_dir=data/lang_bpe_${vocab_size}
if [ ! -f $lang_dir/HL.fst ]; then
./local/prepare_lang_fst.py \
--lang-dir $lang_dir \
--ngram-G ./data/lm/G_3_gram.fst.txt
fi
done
fi
if [ $stage -le 9 ] && [ $stop_stage -ge 9 ]; then