From 810b193dcc3ad3f7a65bc3def63493711c9a084e Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Wed, 3 Nov 2021 07:16:49 +0800
Subject: [PATCH 1/3] Clarify the doc about ctc-decoding. (#104)

---
 .../recipes/librispeech/conformer_ctc.rst     | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/docs/source/recipes/librispeech/conformer_ctc.rst b/docs/source/recipes/librispeech/conformer_ctc.rst
index 57ac246e1..0c3c76c23 100644
--- a/docs/source/recipes/librispeech/conformer_ctc.rst
+++ b/docs/source/recipes/librispeech/conformer_ctc.rst
@@ -303,6 +303,10 @@ The commonly used options are:
 
       $ cd egs/librispeech/ASR
       $ ./conformer_ctc/decode.py --method ctc-decoding --max-duration 300
+      # Caution: The above command is tested with a model with vocab size 500.
+      # The default settings in the master will not work.
+      # Please see https://github.com/k2-fsa/icefall/issues/103
+      # We will fix it later and delete this note.
 
     And the following command uses attention decoder for rescoring:
 
@@ -328,6 +332,8 @@ Usage:
 .. code-block:: bash
 
   $ cd egs/librispeech/ASR
+  # NOTE: Tested with a model with vocab size 500.
+  # It won't work for a model with vocab size 5000.
   $ ./conformer_ctc/decode.py \
       --epoch 25 \
       --avg 1 \
@@ -399,7 +405,7 @@ Download the pre-trained model
 
 The following commands describe how to download the pre-trained model:
 
-.. code-block::
+.. code-block:: bash
 
   $ cd egs/librispeech/ASR
   $ mkdir tmp
@@ -410,10 +416,23 @@ The following commands describe how to download the pre-trained model:
 .. CAUTION::
 
   You have to use ``git lfs`` to download the pre-trained model.
+  Otherwise, you will have the following issue when running ``decode.py``:
+
+    .. code-block::
+
+       _pickle.UnpicklingError: invalid load key, 'v'
+
+  To fix that issue, please use:
+
+     .. code-block:: bash
+
+        cd icefall_asr_librispeech_conformer_ctc
+        git lfs pull
+
 
 .. CAUTION::
 
-  In order to use this pre-trained model, your k2 version has to be v1.7 or later.
+  In order to use this pre-trained model, your k2 version has to be v1.9 or later.
 
 After downloading, you will have the following files:
 

From 91cfecebf20ea7cff3f10eac43a7394f2a624513 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Sat, 6 Nov 2021 08:54:45 +0800
Subject: [PATCH 2/3] Remove duplicated token seq in rescoring. (#108)

* Remove duplicated token seq in rescoring.

* Use a larger range for ngram_lm_scale and attention_scale
---
 icefall/decode.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/icefall/decode.py b/icefall/decode.py
index 619b3267a..d11920618 100644
--- a/icefall/decode.py
+++ b/icefall/decode.py
@@ -224,6 +224,7 @@ class Nbest(object):
         else:
             word_seq = lattice.aux_labels.index(path)
             word_seq = word_seq.remove_axis(word_seq.num_axes - 2)
+        word_seq = word_seq.remove_values_leq(0)
 
         # Each utterance has `num_paths` paths but some of them transduces
         # to the same word sequence, so we need to remove repeated word
@@ -870,6 +871,7 @@ def rescore_with_attention_decoder(
         ngram_lm_scale_list = [0.01, 0.05, 0.08]
         ngram_lm_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0]
         ngram_lm_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0]
+        ngram_lm_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0]
     else:
         ngram_lm_scale_list = [ngram_lm_scale]
 
@@ -877,6 +879,7 @@ def rescore_with_attention_decoder(
         attention_scale_list = [0.01, 0.05, 0.08]
         attention_scale_list += [0.1, 0.3, 0.5, 0.6, 0.7, 0.9, 1.0]
         attention_scale_list += [1.1, 1.2, 1.3, 1.5, 1.7, 1.9, 2.0]
+        attention_scale_list += [2.1, 2.2, 2.3, 2.5, 3.0, 4.0, 5.0]
     else:
         attention_scale_list = [attention_scale]
 

From 04029871b6a54e35d08116917f88eb7d6ead2d02 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Tue, 9 Nov 2021 13:44:51 +0800
Subject: [PATCH 3/3] Fix a bug in Nbest.compute_am_scores and
 Nbest.compute_lm_scores. (#111)

---
 icefall/decode.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/icefall/decode.py b/icefall/decode.py
index d11920618..8b7bdd27f 100644
--- a/icefall/decode.py
+++ b/icefall/decode.py
@@ -364,7 +364,13 @@ class Nbest(object):
           Return a ragged tensor with 2 axes [utt][path_scores].
           Its dtype is torch.float64.
         """
-        saved_scores = self.fsa.scores
+        # Caution: We need a clone here. `self.fsa.scores` is a
+        # reference to a tensor representing the last field of an arc
+        # in the FSA (Remeber that an arc has four fields.) If we later assign
+        # `self.fsa.scores`, it will also change the scores on every arc, which
+        # means saved_scores will also be changed if we don't use `clone()`
+        # here.
+        saved_scores = self.fsa.scores.clone()
 
         # The `scores` of every arc consists of `am_scores` and `lm_scores`
         self.fsa.scores = self.fsa.scores - self.fsa.lm_scores
@@ -391,10 +397,10 @@ class Nbest(object):
           Return a ragged tensor with 2 axes [utt][path_scores].
           Its dtype is torch.float64.
         """
-        saved_scores = self.fsa.scores
+        saved_scores = self.fsa.scores.clone()
 
         # The `scores` of every arc consists of `am_scores` and `lm_scores`
-        self.fsa.scores = self.fsa.lm_scores
+        self.fsa.scores = self.fsa.lm_scores.clone()
 
         lm_scores = self.fsa.get_tot_scores(
             use_double_scores=True, log_semiring=False