mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 18:24:18 +00:00
Update prepare.sh
This commit is contained in:
parent
204a3b2fb2
commit
750e2ac035
@ -300,7 +300,32 @@ if [ $stage -le 10 ] && [ $stop_stage -ge 10 ]; then
|
|||||||
# it using: pip install kaldilm
|
# it using: pip install kaldilm
|
||||||
|
|
||||||
if [ $lang == "yue" ] || [ $lang == "zh_TW" ] || [ $lang == "zh_CN" ] || [ $lang == "zh_HK" ]; then
|
if [ $lang == "yue" ] || [ $lang == "zh_TW" ] || [ $lang == "zh_CN" ] || [ $lang == "zh_HK" ]; then
|
||||||
echo "TO BE IMPLEMENTED"
|
lang_dir=data/${lang}/lang_char
|
||||||
|
mkdir -p $lang_dir/lm
|
||||||
|
|
||||||
|
for ngram in 3 ; do
|
||||||
|
if [ ! -f $lang_dir/lm/${ngram}-gram.unpruned.arpa ]; then
|
||||||
|
./shared/make_kn_lm.py \
|
||||||
|
-ngram-order ${ngram} \
|
||||||
|
-text $lang_dir/transcript_words.txt \
|
||||||
|
-lm $lang_dir/lm/${ngram}gram.unpruned.arpa
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $lang_dir/lm/G_${ngram}_gram_char.fst.txt ]; then
|
||||||
|
python3 -m kaldilm \
|
||||||
|
--read-symbol-table="$lang_dir/words.txt" \
|
||||||
|
--disambig-symbol='#0' \
|
||||||
|
--max-order=${ngram} \
|
||||||
|
$lang_dir/lm/${ngram}gram.unpruned.arpa \
|
||||||
|
> $lang_dir/lm/G_${ngram}_gram_char.fst.txt
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $lang_dir/lm/HLG.fst ]; then
|
||||||
|
./local/prepare_lang_fst.py \
|
||||||
|
--lang-dir $lang_dir \
|
||||||
|
--ngram-G $lang_dir/lm/G_${ngram}_gram_char.fst.txt
|
||||||
|
fi
|
||||||
|
done
|
||||||
else
|
else
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
for vocab_size in ${vocab_sizes[@]}; do
|
||||||
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
||||||
@ -329,6 +354,14 @@ fi
|
|||||||
if [ $stage -le 11 ] && [ $stop_stage -ge 11 ]; then
|
if [ $stage -le 11 ] && [ $stop_stage -ge 11 ]; then
|
||||||
log "Stage 11: Compile HLG"
|
log "Stage 11: Compile HLG"
|
||||||
|
|
||||||
|
if [ $lang == "yue" ] || [ $lang == "zh_TW" ] || [ $lang == "zh_CN" ] || [ $lang == "zh_HK" ]; then
|
||||||
|
lang_dir=data/${lang}/lang_char
|
||||||
|
for ngram in 3 ; do
|
||||||
|
if [ ! -f $lang_dir/lm/HLG_${ngram}.fst ]; then
|
||||||
|
./local/compile_hlg.py --lang-dir $lang_dir --lm G_${ngram}_gram_char
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
for vocab_size in ${vocab_sizes[@]}; do
|
||||||
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
||||||
./local/compile_hlg.py --lang-dir $lang_dir
|
./local/compile_hlg.py --lang-dir $lang_dir
|
||||||
@ -339,13 +372,23 @@ if [ $stage -le 11 ] && [ $stop_stage -ge 11 ]; then
|
|||||||
# ./local/compile_hlg_using_openfst.py --lang-dir $lang_dir
|
# ./local/compile_hlg_using_openfst.py --lang-dir $lang_dir
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Compile LG for RNN-T fast_beam_search decoding
|
# Compile LG for RNN-T fast_beam_search decoding
|
||||||
if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
|
||||||
log "Stage 12: Compile LG"
|
log "Stage 12: Compile LG"
|
||||||
|
|
||||||
|
if [ $lang == "yue" ] || [ $lang == "zh_TW" ] || [ $lang == "zh_CN" ] || [ $lang == "zh_HK" ]; then
|
||||||
|
lang_dir=data/${lang}/lang_char
|
||||||
|
for ngram in 3 ; do
|
||||||
|
if [ ! -f $lang_dir/lm/LG_${ngram}.fst ]; then
|
||||||
|
./local/compile_lg.py --lang-dir $lang_dir --lm G_${ngram}_gram_char
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
else
|
||||||
for vocab_size in ${vocab_sizes[@]}; do
|
for vocab_size in ${vocab_sizes[@]}; do
|
||||||
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
lang_dir=data/${lang}/lang_bpe_${vocab_size}
|
||||||
./local/compile_lg.py --lang-dir $lang_dir
|
./local/compile_lg.py --lang-dir $lang_dir
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user