From 00f8371f373805031fee8ab00ab8e1d64a8e1d78 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Jul 2021 18:24:04 +0800 Subject: [PATCH] begin to add LM rescoring. --- .github/workflows/test.yml | 2 +- egs/librispeech/ASR/prepare.sh | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f808bffbb..7da954790 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,7 +30,7 @@ jobs: strategy: matrix: os: [ubuntu-18.04, macos-10.15] - python-version: [3.7, 3.9] + python-version: [3.6, 3.7, 3.8, 3.9] torch: ["1.8.1"] k2-version: ["1.2.dev20210723"] fail-fast: false diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh index 3f827b223..87786c5c8 100755 --- a/egs/librispeech/ASR/prepare.sh +++ b/egs/librispeech/ASR/prepare.sh @@ -93,13 +93,23 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then # We assume you have install kaldilm, if not, please install # it using: pip install kaldilm - if [ ! -e data/lm/G_3_gram.fst.txt ]; then + if [ ! -f data/lm/G_3_gram.fst.txt ]; then + # It is used in building HLG python3 -m kaldilm \ --read-symbol-table="data/lang/words.txt" \ --disambig-symbol='#0' \ --max-order=3 \ data/lm/3-gram.pruned.1e-7.arpa > data/lm/G_3_gram.fst.txt fi + + if [ ! -f data/lm/G_4_gram.fst.txt ]; then + # It is used for LM rescoring + python3 -m kaldilm \ + --read-symbol-table="data/lang/words.txt" \ + --disambig-symbol='#0' \ + --max-order=4 \ + data/lm/4-gram.arpa > data/lm/G_4_gram.fst.txt + fi fi if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then