diff --git a/egs/wenetspeech/ASR/RESULTS.md b/egs/wenetspeech/ASR/RESULTS.md index 8ba40d5aa..3eb1e4a2a 100644 --- a/egs/wenetspeech/ASR/RESULTS.md +++ b/egs/wenetspeech/ASR/RESULTS.md @@ -15,7 +15,7 @@ When training with the L subset, the WERs are | fast beam search (1best) | 7.94 | 8.74 | 13.80 | --epoch 10, --avg 2, --max-duration 1500 | | fast beam search (nbest) | 9.82 | 10.98 | 16.37 | --epoch 10, --avg 2, --max-duration 600 | | fast beam search (nbest oracle) | 6.88 | 7.18 | 11.77 | --epoch 10, --avg 2, --max-duration 600 | -| fast beam search (nbest LG) | 14.94 | 16.14 | 22.93 | --epoch 10, --avg 2, --max-duration 600 | +| fast beam search (nbest LG, ngram_lm_scale=0.35) | 8.83 | 9.88 | 15.47 | --epoch 10, --avg 2, --max-duration 600 | The training command for reproducing is given below: @@ -110,6 +110,7 @@ avg=2 --lang-dir data/lang_char \ --max-duration 600 \ --decoding-method fast_beam_search_nbest_LG \ + --ngram-lm-scale 0.35 \ --beam 20.0 \ --max-contexts 8 \ --max-states 64 diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py index b45d7b971..b9eec9c9f 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/decode.py @@ -225,7 +225,7 @@ def get_parser(): parser.add_argument( "--ngram-lm-scale", type=float, - default=0.01, + default=0.07, help=""" Used only when --decoding_method is fast_beam_search_nbest_LG. It specifies the scale for n-gram LM scores. @@ -590,6 +590,8 @@ def main(): params.suffix += f"-beam-{params.beam}" params.suffix += f"-max-contexts-{params.max_contexts}" params.suffix += f"-max-states-{params.max_states}" + if params.decoding_method == "fast_beam_search_nbest_LG": + params.suffix += f"-ngram-lm-scale-{params.ngram_lm_scale}" elif "beam_search" in params.decoding_method: params.suffix += f"-beam-{params.beam_size}" else: