From 91cfecebf20ea7cff3f10eac43a7394f2a624513 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 6 Nov 2021 08:54:45 +0800 Subject: [PATCH] Remove duplicated token seq in rescoring. (#108) * Remove duplicated token seq in rescoring. * Use a larger range for ngram_lm_scale and attention_scale --- icefall/decode.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/icefall/decode.py b/icefall/decode.py index 619b3267a..d11920618 100644 --- a/icefall/decode.py +++ b/icefall/decode.py @@ -224,6 +224,7 @@ class Nbest(object): else: word_seq = lattice.aux_labels.index(path) word_seq = word_seq.remove_axis(word_seq.num_axes - 2) + word_seq = word_seq.remove_values_leq(0) # Each utterance has `num_paths` paths but some of them transduces # to the same word sequence, so we need to remove repeated word @@ -870,6 +871,7 @@ def rescore_with_attention_decoder( ngram_lm_scale_list = [0.01, 0.05, 0.08] ngram_lm_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0] ngram_lm_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0] + ngram_lm_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0] else: ngram_lm_scale_list = [ngram_lm_scale] @@ -877,6 +879,7 @@ def rescore_with_attention_decoder( attention_scale_list = [0.01, 0.05, 0.08] attention_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0] attention_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0] + attention_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0] else: attention_scale_list = [attention_scale]