This commit is contained in:
Piotr Żelasko 2022-01-15 05:03:45 +00:00
parent 3582599a33
commit 186f5f1ba4

View File

@ -181,7 +181,7 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
# Add special words to words.txt # Add special words to words.txt
echo "<eps> 0" > $lang_dir/words.txt echo "<eps> 0" > $lang_dir/words.txt
echo "!SIL 1" >> $lang_dir/words.txt echo "!SIL 1" >> $lang_dir/words.txt
echo "<UNK> 2" >> $lang_dir/words.txt echo "[UNK] 2" >> $lang_dir/words.txt
# Add regular words to words.txt # Add regular words to words.txt
gunzip -c data/manifests/fisher-swbd_supervisions_norm.jsonl.gz \ gunzip -c data/manifests/fisher-swbd_supervisions_norm.jsonl.gz \
@ -195,9 +195,11 @@ if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
# Add remaining special word symbols expected by LM scripts. # Add remaining special word symbols expected by LM scripts.
num_words=$(wc -l $lang_dir/words.txt) num_words=$(wc -l $lang_dir/words.txt)
echo "<s> $((num_words))" echo "<s> ${num_words}" >> $lang_dir/words.txt
echo "</s> $((num_words+1))" num_words=$(wc -l $lang_dir/words.txt)
echo "#0 $((num_words+2))" echo "</s> ${num_words}" >> $lang_dir/words.txt
num_words=$(wc -l $lang_dir/words.txt)
echo "#0 ${num_words}" >> $lang_dir/words.txt
if [ ! -f $lang_dir/L_disambig.pt ]; then if [ ! -f $lang_dir/L_disambig.pt ]; then
pip install g2p_en pip install g2p_en