Remove duplicated token seq in rescoring. (#108)

* Remove duplicated token seq in rescoring.

* Use a larger range for ngram_lm_scale and attention_scale
This commit is contained in:
Fangjun Kuang 2021-11-06 08:54:45 +08:00 committed by GitHub
parent 810b193dcc
commit 91cfecebf2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -224,6 +224,7 @@ class Nbest(object):
else: else:
word_seq = lattice.aux_labels.index(path) word_seq = lattice.aux_labels.index(path)
word_seq = word_seq.remove_axis(word_seq.num_axes - 2) word_seq = word_seq.remove_axis(word_seq.num_axes - 2)
word_seq = word_seq.remove_values_leq(0)
# Each utterance has `num_paths` paths but some of them transduces # Each utterance has `num_paths` paths but some of them transduces
# to the same word sequence, so we need to remove repeated word # to the same word sequence, so we need to remove repeated word
@ -870,6 +871,7 @@ def rescore_with_attention_decoder(
ngram_lm_scale_list = [0.01, 0.05, 0.08] ngram_lm_scale_list = [0.01, 0.05, 0.08]
ngram_lm_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0] ngram_lm_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0]
ngram_lm_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0] ngram_lm_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0]
ngram_lm_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0]
else: else:
ngram_lm_scale_list = [ngram_lm_scale] ngram_lm_scale_list = [ngram_lm_scale]
@ -877,6 +879,7 @@ def rescore_with_attention_decoder(
attention_scale_list = [0.01, 0.05, 0.08] attention_scale_list = [0.01, 0.05, 0.08]
attention_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0] attention_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0]
attention_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0] attention_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0]
attention_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0]
else: else:
attention_scale_list = [attention_scale] attention_scale_list = [attention_scale]