From 39bc8cae94cb3b5824a93b5033136fba546322b9 Mon Sep 17 00:00:00 2001
From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com>
Date: Wed, 13 Oct 2021 12:20:16 +0800
Subject: [PATCH] Add ctc decoding to pretrained.py on conformer_ctc (#75)

* Add ctc-decoding to pretrained.py

* update pretrained.py and conformer_ctc.rst

* update ctc-decoding for pretrained.py on conformer_ctc

* Update pretrained.py

* fix the style issue

* Update conformer_ctc.rst

* Update the running logs
---
 .../recipes/librispeech/conformer_ctc.rst     | 119 +++++++----
 .../ASR/conformer_ctc/pretrained.py           | 202 +++++++++++-------
 2 files changed, 211 insertions(+), 110 deletions(-)

diff --git a/docs/source/recipes/librispeech/conformer_ctc.rst b/docs/source/recipes/librispeech/conformer_ctc.rst
index 84e99306f..45ad79313 100644
--- a/docs/source/recipes/librispeech/conformer_ctc.rst
+++ b/docs/source/recipes/librispeech/conformer_ctc.rst
@@ -429,6 +429,7 @@ After downloading, you will have the following files:
       |-- README.md
       |-- data
       |   |-- lang_bpe
+      |   |   |-- Linv.pt
       |   |   |-- HLG.pt
       |   |   |-- bpe.model
       |   |   |-- tokens.txt
@@ -446,6 +447,9 @@ After downloading, you will have the following files:
   6 directories, 11 files
 
 **File descriptions**:
+  - ``data/lang_bpe/Linv.pt``
+
+      It is the lexicon file, with word IDs as labels and token IDs as aux_labels.
 
   - ``data/lang_bpe/HLG.pt``
 
@@ -527,12 +531,58 @@ Usage
 
 displays the help information.
 
-It supports three decoding methods:
+It supports 4 decoding methods:
 
+  - CTC decoding
   - HLG decoding
   - HLG + n-gram LM rescoring
   - HLG + n-gram LM rescoring + attention decoder rescoring
 
+CTC decoding
+^^^^^^^^^^^^
+
+CTC decoding uses the best path of the decoding lattice as the decoding result
+without any LM or lexicon.
+
+The command to run CTC decoding is:
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./conformer_ctc/pretrained.py \
+    --checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
+    --lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
+    --method ctc-decoding \
+    ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac \
+    ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac \
+    ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac
+
+The output is given below:
+
+.. code-block::
+
+  2021-10-13 11:21:50,896 INFO [pretrained.py:236] device: cuda:0
+  2021-10-13 11:21:50,896 INFO [pretrained.py:238] Creating model
+  2021-10-13 11:21:56,669 INFO [pretrained.py:255] Constructing Fbank computer
+  2021-10-13 11:21:56,670 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
+  2021-10-13 11:21:56,683 INFO [pretrained.py:271] Decoding started
+  2021-10-13 11:21:57,341 INFO [pretrained.py:290] Building CTC topology
+  2021-10-13 11:21:57,625 INFO [lexicon.py:113] Loading pre-compiled tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/Linv.pt
+  2021-10-13 11:21:57,679 INFO [pretrained.py:299] Loading BPE model
+  2021-10-13 11:22:00,076 INFO [pretrained.py:314] Use CTC decoding
+  2021-10-13 11:22:00,087 INFO [pretrained.py:400] 
+  ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
+  AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
+
+  ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac:
+  GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONOURED
+  BOSOM TO CONNECT HER PARENT FOR EVER WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN
+
+  ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
+  YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
+
+  2021-10-13 11:22:00,087 INFO [pretrained.py:402] Decoding Done
+
 HLG decoding
 ^^^^^^^^^^^^
 
@@ -545,8 +595,7 @@ The command to run HLG decoding is:
   $ cd egs/librispeech/ASR
   $ ./conformer_ctc/pretrained.py \
     --checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
-    --words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
-    --HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
+    --lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
     ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac \
     ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac \
     ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac
@@ -555,14 +604,14 @@ The output is given below:
 
 .. code-block::
 
-  2021-08-20 11:03:05,712 INFO [pretrained.py:217] device: cuda:0
-  2021-08-20 11:03:05,712 INFO [pretrained.py:219] Creating model
-  2021-08-20 11:03:11,345 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
-  2021-08-20 11:03:18,442 INFO [pretrained.py:255] Constructing Fbank computer
-  2021-08-20 11:03:18,444 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
-  2021-08-20 11:03:18,507 INFO [pretrained.py:271] Decoding started
-  2021-08-20 11:03:18,795 INFO [pretrained.py:300] Use HLG decoding
-  2021-08-20 11:03:19,149 INFO [pretrained.py:339]
+  2021-10-13 11:25:19,458 INFO [pretrained.py:236] device: cuda:0
+  2021-10-13 11:25:19,458 INFO [pretrained.py:238] Creating model
+  2021-10-13 11:25:25,342 INFO [pretrained.py:255] Constructing Fbank computer
+  2021-10-13 11:25:25,343 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
+  2021-10-13 11:25:25,356 INFO [pretrained.py:271] Decoding started
+  2021-10-13 11:25:26,026 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
+  2021-10-13 11:25:33,735 INFO [pretrained.py:359] Use HLG decoding
+  2021-10-13 11:25:34,013 INFO [pretrained.py:400] 
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
   AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
 
@@ -573,7 +622,7 @@ The output is given below:
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
   YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
 
-  2021-08-20 11:03:19,149 INFO [pretrained.py:341] Decoding Done
+  2021-10-13 11:25:34,014 INFO [pretrained.py:402] Decoding Done
 
 HLG decoding + LM rescoring
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -588,8 +637,7 @@ The command to run HLG decoding + LM rescoring is:
   $ cd egs/librispeech/ASR
   $ ./conformer_ctc/pretrained.py \
     --checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
-    --words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
-    --HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
+    --lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
     --method whole-lattice-rescoring \
     --G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
     --ngram-lm-scale 0.8 \
@@ -601,15 +649,15 @@ Its output is:
 
 .. code-block::
 
-  2021-08-20 11:12:17,565 INFO [pretrained.py:217] device: cuda:0
-  2021-08-20 11:12:17,565 INFO [pretrained.py:219] Creating model
-  2021-08-20 11:12:23,728 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
-  2021-08-20 11:12:30,035 INFO [pretrained.py:246] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
-  2021-08-20 11:13:10,779 INFO [pretrained.py:255] Constructing Fbank computer
-  2021-08-20 11:13:10,787 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
-  2021-08-20 11:13:10,798 INFO [pretrained.py:271] Decoding started
-  2021-08-20 11:13:11,085 INFO [pretrained.py:305] Use HLG decoding + LM rescoring
-  2021-08-20 11:13:11,736 INFO [pretrained.py:339]
+  2021-10-13 11:28:19,129 INFO [pretrained.py:236] device: cuda:0
+  2021-10-13 11:28:19,129 INFO [pretrained.py:238] Creating model
+  2021-10-13 11:28:23,531 INFO [pretrained.py:255] Constructing Fbank computer
+  2021-10-13 11:28:23,532 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
+  2021-10-13 11:28:23,544 INFO [pretrained.py:271] Decoding started
+  2021-10-13 11:28:24,141 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
+  2021-10-13 11:28:30,752 INFO [pretrained.py:338] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
+  2021-10-13 11:28:48,308 INFO [pretrained.py:364] Use HLG decoding + LM rescoring
+  2021-10-13 11:28:48,815 INFO [pretrained.py:400] 
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
   AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
 
@@ -620,7 +668,7 @@ Its output is:
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
   YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
 
-  2021-08-20 11:13:11,737 INFO [pretrained.py:341] Decoding Done
+  2021-10-13 11:28:48,815 INFO [pretrained.py:402] Decoding Done
 
 HLG decoding + LM rescoring + attention decoder rescoring
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -636,8 +684,7 @@ The command to run HLG decoding + LM rescoring + attention decoder rescoring is:
   $ cd egs/librispeech/ASR
   $ ./conformer_ctc/pretrained.py \
     --checkpoint ./tmp/icefall_asr_librispeech_conformer_ctc/exp/pretrained.pt \
-    --words-file ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/words.txt \
-    --HLG ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt \
+    --lang-dir ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe \
     --method attention-decoder \
     --G ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt \
     --ngram-lm-scale 1.3 \
@@ -654,15 +701,15 @@ The output is below:
 
 .. code-block::
 
-  2021-08-20 11:19:11,397 INFO [pretrained.py:217] device: cuda:0
-  2021-08-20 11:19:11,397 INFO [pretrained.py:219] Creating model
-  2021-08-20 11:19:17,354 INFO [pretrained.py:238] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
-  2021-08-20 11:19:24,615 INFO [pretrained.py:246] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
-  2021-08-20 11:20:04,576 INFO [pretrained.py:255] Constructing Fbank computer
-  2021-08-20 11:20:04,584 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
-  2021-08-20 11:20:04,595 INFO [pretrained.py:271] Decoding started
-  2021-08-20 11:20:04,854 INFO [pretrained.py:313] Use HLG + LM rescoring + attention decoder rescoring
-  2021-08-20 11:20:05,805 INFO [pretrained.py:339]
+  2021-10-13 11:29:50,106 INFO [pretrained.py:236] device: cuda:0
+  2021-10-13 11:29:50,106 INFO [pretrained.py:238] Creating model
+  2021-10-13 11:29:56,063 INFO [pretrained.py:255] Constructing Fbank computer
+  2021-10-13 11:29:56,063 INFO [pretrained.py:265] Reading sound files: ['./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac']
+  2021-10-13 11:29:56,077 INFO [pretrained.py:271] Decoding started
+  2021-10-13 11:29:56,770 INFO [pretrained.py:327] Loading HLG from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lang_bpe/HLG.pt
+  2021-10-13 11:30:04,023 INFO [pretrained.py:338] Loading G from ./tmp/icefall_asr_librispeech_conformer_ctc/data/lm/G_4_gram.pt
+  2021-10-13 11:30:18,163 INFO [pretrained.py:372] Use HLG + LM rescoring + attention decoder rescoring
+  2021-10-13 11:30:19,367 INFO [pretrained.py:400] 
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1089-134686-0001.flac:
   AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
 
@@ -673,7 +720,7 @@ The output is below:
   ./tmp/icefall_asr_librispeech_conformer_ctc/test_wavs/1221-135766-0002.flac:
   YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
 
-  2021-08-20 11:20:05,805 INFO [pretrained.py:341] Decoding Done
+  2021-10-13 11:30:19,367 INFO [pretrained.py:402] Decoding Done
 
 Colab notebook
 --------------
diff --git a/egs/librispeech/ASR/conformer_ctc/pretrained.py b/egs/librispeech/ASR/conformer_ctc/pretrained.py
index 00812d674..07d3e7269 100755
--- a/egs/librispeech/ASR/conformer_ctc/pretrained.py
+++ b/egs/librispeech/ASR/conformer_ctc/pretrained.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
-# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang,
+#                                                    Mingshuang Luo)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -19,6 +20,7 @@
 import argparse
 import logging
 import math
+import sentencepiece as spm
 from typing import List
 
 import k2
@@ -28,6 +30,7 @@ import torchaudio
 from conformer import Conformer
 from torch.nn.utils.rnn import pad_sequence
 
+from icefall.lexicon import Lexicon
 from icefall.decode import (
     get_lattice,
     one_best_decoding,
@@ -52,14 +55,10 @@ def get_parser():
     )
 
     parser.add_argument(
-        "--words-file",
+        "--lang-dir",
         type=str,
         required=True,
-        help="Path to words.txt",
-    )
-
-    parser.add_argument(
-        "--HLG", type=str, required=True, help="Path to HLG.pt."
+        help="Path to lang bpe dir.",
     )
 
     parser.add_argument(
@@ -68,6 +67,10 @@ def get_parser():
         default="1best",
         help="""Decoding method.
         Possible values are:
+        (0) ctc-decoding - Use CTC decoding. It uses a sentence
+            piece model, i.e., lang_dir/bpe.model, to convert
+            word pieces to words. It needs neither a lexicon
+            nor an n-gram LM.
         (1) 1best - Use the best path as decoding output. Only
             the transformer encoder output is used for decoding.
             We call it HLG decoding.
@@ -249,23 +252,6 @@ def main():
     model.to(device)
     model.eval()
 
-    logging.info(f"Loading HLG from {params.HLG}")
-    HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
-    HLG = HLG.to(device)
-    if not hasattr(HLG, "lm_scores"):
-        # For whole-lattice-rescoring and attention-decoder
-        HLG.lm_scores = HLG.scores.clone()
-
-    if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
-        logging.info(f"Loading G from {params.G}")
-        G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
-        # Add epsilon self-loops to G as we will compose
-        # it with the whole lattice later
-        G = G.to(device)
-        G = k2.add_epsilon_self_loops(G)
-        G = k2.arc_sort(G)
-        G.lm_scores = G.scores.clone()
-
     logging.info("Constructing Fbank computer")
     opts = kaldifeat.FbankOptions()
     opts.device = device
@@ -299,60 +285,128 @@ def main():
         dtype=torch.int32,
     )
 
-    lattice = get_lattice(
-        nnet_output=nnet_output,
-        decoding_graph=HLG,
-        supervision_segments=supervision_segments,
-        search_beam=params.search_beam,
-        output_beam=params.output_beam,
-        min_active_states=params.min_active_states,
-        max_active_states=params.max_active_states,
-        subsampling_factor=params.subsampling_factor,
-    )
+    try:
+        if params.method == "ctc-decoding":
+            logging.info("Building CTC topology")
+            lexicon = Lexicon(params.lang_dir)
+            max_token_id = max(lexicon.tokens)
+            H = k2.ctc_topo(
+                max_token=max_token_id,
+                modified=False,
+                device=device,
+            )
 
-    if params.method == "1best":
-        logging.info("Use HLG decoding")
-        best_path = one_best_decoding(
-            lattice=lattice, use_double_scores=params.use_double_scores
-        )
-    elif params.method == "whole-lattice-rescoring":
-        logging.info("Use HLG decoding + LM rescoring")
-        best_path_dict = rescore_with_whole_lattice(
-            lattice=lattice,
-            G_with_epsilon_loops=G,
-            lm_scale_list=[params.ngram_lm_scale],
-        )
-        best_path = next(iter(best_path_dict.values()))
-    elif params.method == "attention-decoder":
-        logging.info("Use HLG + LM rescoring + attention decoder rescoring")
-        rescored_lattice = rescore_with_whole_lattice(
-            lattice=lattice, G_with_epsilon_loops=G, lm_scale_list=None
-        )
-        best_path_dict = rescore_with_attention_decoder(
-            lattice=rescored_lattice,
-            num_paths=params.num_paths,
-            model=model,
-            memory=memory,
-            memory_key_padding_mask=memory_key_padding_mask,
-            sos_id=params.sos_id,
-            eos_id=params.eos_id,
-            nbest_scale=params.nbest_scale,
-            ngram_lm_scale=params.ngram_lm_scale,
-            attention_scale=params.attention_decoder_scale,
-        )
-        best_path = next(iter(best_path_dict.values()))
+            logging.info("Loading BPE model")
+            bpe_model = spm.SentencePieceProcessor()
+            bpe_model.load(params.lang_dir + "/bpe.model")
 
-    hyps = get_texts(best_path)
-    word_sym_table = k2.SymbolTable.from_file(params.words_file)
-    hyps = [[word_sym_table[i] for i in ids] for ids in hyps]
+            lattice = get_lattice(
+                nnet_output=nnet_output,
+                decoding_graph=H,
+                supervision_segments=supervision_segments,
+                search_beam=params.search_beam,
+                output_beam=params.output_beam,
+                min_active_states=params.min_active_states,
+                max_active_states=params.max_active_states,
+                subsampling_factor=params.subsampling_factor,
+            )
 
-    s = "\n"
-    for filename, hyp in zip(params.sound_files, hyps):
-        words = " ".join(hyp)
-        s += f"{filename}:\n{words}\n\n"
-    logging.info(s)
+            logging.info("Use CTC decoding")
+            best_path = one_best_decoding(
+                lattice=lattice, use_double_scores=params.use_double_scores
+            )
+            token_ids = get_texts(best_path)
+            hyps = bpe_model.decode(token_ids)
+            hyps = [s.split() for s in hyps]
 
-    logging.info("Decoding Done")
+        if params.method in [
+            "1best",
+            "whole-lattice-rescoring",
+            "attention-decoder",
+        ]:
+            logging.info(f"Loading HLG from {params.lang_dir}/HLG.pt")
+            HLG = k2.Fsa.from_dict(
+                torch.load(params.lang_dir + "/HLG.pt", map_location="cpu")
+            )
+            HLG = HLG.to(device)
+            if not hasattr(HLG, "lm_scores"):
+                # For whole-lattice-rescoring and attention-decoder
+                HLG.lm_scores = HLG.scores.clone()
+
+            if params.method in [
+                "whole-lattice-rescoring",
+                "attention-decoder",
+            ]:
+                logging.info(f"Loading G from {params.G}")
+                G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
+                # Add epsilon self-loops to G as we will compose
+                # it with the whole lattice later
+                G = G.to(device)
+                G = k2.add_epsilon_self_loops(G)
+                G = k2.arc_sort(G)
+                G.lm_scores = G.scores.clone()
+
+            lattice = get_lattice(
+                nnet_output=nnet_output,
+                decoding_graph=HLG,
+                supervision_segments=supervision_segments,
+                search_beam=params.search_beam,
+                output_beam=params.output_beam,
+                min_active_states=params.min_active_states,
+                max_active_states=params.max_active_states,
+                subsampling_factor=params.subsampling_factor,
+            )
+
+            if params.method == "1best":
+                logging.info("Use HLG decoding")
+                best_path = one_best_decoding(
+                    lattice=lattice, use_double_scores=params.use_double_scores
+                )
+            elif params.method == "whole-lattice-rescoring":
+                logging.info("Use HLG decoding + LM rescoring")
+                best_path_dict = rescore_with_whole_lattice(
+                    lattice=lattice,
+                    G_with_epsilon_loops=G,
+                    lm_scale_list=[params.ngram_lm_scale],
+                )
+                best_path = next(iter(best_path_dict.values()))
+            elif params.method == "attention-decoder":
+                logging.info(
+                    "Use HLG + LM rescoring + attention decoder rescoring"
+                )
+                rescored_lattice = rescore_with_whole_lattice(
+                    lattice=lattice, G_with_epsilon_loops=G, lm_scale_list=None
+                )
+                best_path_dict = rescore_with_attention_decoder(
+                    lattice=rescored_lattice,
+                    num_paths=params.num_paths,
+                    model=model,
+                    memory=memory,
+                    memory_key_padding_mask=memory_key_padding_mask,
+                    sos_id=params.sos_id,
+                    eos_id=params.eos_id,
+                    nbest_scale=params.nbest_scale,
+                    ngram_lm_scale=params.ngram_lm_scale,
+                    attention_scale=params.attention_decoder_scale,
+                )
+                best_path = next(iter(best_path_dict.values()))
+
+            hyps = get_texts(best_path)
+            word_sym_table = k2.SymbolTable.from_file(
+                params.lang_dir + "/words.txt"
+            )
+            hyps = [[word_sym_table[i] for i in ids] for ids in hyps]
+
+        s = "\n"
+        for filename, hyp in zip(params.sound_files, hyps):
+            words = " ".join(hyp)
+            s += f"{filename}:\n{words}\n\n"
+        logging.info(s)
+
+        logging.info("Decoding Done")
+
+    except Exception:
+        raise ValueError("Please use a supported decoding method.")
 
 
 if __name__ == "__main__":