update ctc-decoding for pretrained.py on conformer_ctc

2025-08-26 18:24:18 +00:00 · 2021-10-13 00:52:40 +08:00 · 2021-10-13 00:52:40 +08:00 · 524afc02ba
commit 524afc02ba
parent 7fd9d291f3
2 changed files with 114 additions and 104 deletions
--- a/docs/source/recipes/librispeech/conformer_ctc.rst
+++ b/docs/source/recipes/librispeech/conformer_ctc.rst
@ -448,7 +448,7 @@ After downloading, you will have the following files:
 **File descriptions**:
  - ``data/lang_bpe/Linv.pt``

-      It is the lexicon file.
+      It is the lexicon file, with word IDs as labels and token IDs as aux_labels.

  - ``data/lang_bpe/HLG.pt``

@ -530,7 +530,7 @@ Usage

 displays the help information.

-It supports three decoding methods:
+It supports 4 decoding methods:

  - CTC decoding
  - HLG decoding
--- a/egs/librispeech/ASR/conformer_ctc/pretrained.py
+++ b/egs/librispeech/ASR/conformer_ctc/pretrained.py
@ -57,16 +57,14 @@ def get_parser():
    parser.add_argument(
        "--words-file",
        type=str,
-        default="./tmp/icefall_asr_librispeech_conformer_ctc/ \
-        data/lang_bpe/words.txt",
+        required=True,
        help="Path to words.txt",
    )

    parser.add_argument(
        "--HLG",
        type=str,
-        default="./tmp/icefall_asr_librispeech_conformer_ctc/ \
-        data/lang_bpe/HLG.pt",
+        required=True,
        help="Path to HLG.pt.",
    )

@ -172,8 +170,7 @@ def get_parser():
    parser.add_argument(
        "--lang-dir",
        type=str,
-        default="./tmp/icefall_asr_librispeech_conformer_ctc/ \
-        data/lang_bpe",
+        required=True,
        help="Path to lang bpe dir.",
    )

@ -302,6 +299,7 @@ def main():
        dtype=torch.int32,
    )

+    try:
        if params.method == "ctc-decoding":
            logging.info("Building CTC topology")
            lexicon = Lexicon(params.lang_dir)
@ -335,7 +333,11 @@ def main():
            hyps = bpe_model.decode(token_ids)
            hyps = [s.split() for s in hyps]

-    else:
+        if params.method in [
+            "1best",
+            "whole-lattice-rescoring",
+            "attention-decoder",
+        ]:
            logging.info(f"Loading HLG from {params.HLG}")
            HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
            HLG = HLG.to(device)
@ -343,7 +345,10 @@ def main():
                # For whole-lattice-rescoring and attention-decoder
                HLG.lm_scores = HLG.scores.clone()

-        if params.method in ["whole-lattice-rescoring", "attention-decoder"]:
+            if params.method in [
+                "whole-lattice-rescoring",
+                "attention-decoder",
+            ]:
                logging.info(f"Loading G from {params.G}")
                G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
                # Add epsilon self-loops to G as we will compose
@ -378,7 +383,9 @@ def main():
                )
                best_path = next(iter(best_path_dict.values()))
            elif params.method == "attention-decoder":
-            logging.info("Use HLG + LM rescoring + attention decoder rescoring")
+                logging.info(
+                    "Use HLG + LM rescoring + attention decoder rescoring"
+                )
                rescored_lattice = rescore_with_whole_lattice(
                    lattice=lattice, G_with_epsilon_loops=G, lm_scale_list=None
                )
@ -408,6 +415,9 @@ def main():

        logging.info("Decoding Done")

+    except Exception:
+        raise ValueError("Please use a supported decoding method.")
+

 if __name__ == "__main__":
    formatter = (