Add self-loops to propagate disambiguation symbols.

2025-08-08 09:32:20 +00:00 · 2021-07-21 13:12:20 +08:00 · 2021-07-21 13:12:20 +08:00 · a01d08f73c
commit a01d08f73c
parent 8a72901f3a
2 changed files with 66 additions and 9 deletions
--- a/egs/librispeech/ASR/local/prepare_lang.py
+++ b/egs/librispeech/ASR/local/prepare_lang.py
@ -18,15 +18,13 @@ consisting of words and phones and does the following:
        lexicon = k2.Fsa.from_dict(d)

 5. Generate L_disambig.pt, in k2 format.
-
-6. Generate lexicon_disambig.txt
 """
 import math
 import re
 import sys
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Any, Dict, List, Tuple

 import k2
 import torch
@ -90,6 +88,10 @@ def write_lexicon(filename: str, lexicon: Lexicon) -> None:
 def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
    """Write a symbol to ID mapping to a file.

+    Note:
+      No need to implement `read_mapping` as it can be done
+      through :func:`k2.SymbolTable.from_file`.
+
    Args:
      filename:
        Filename to save the mapping.
@ -119,7 +121,7 @@ def get_phones(lexicon: Lexicon) -> List[str]:
    return sorted_ans


-def get_words(lexicon: List[Tuple[str, List[str]]]) -> List[str]:
+def get_words(lexicon: Lexicon) -> List[str]:
    """Get words from a lexicon.

    Args:
@ -213,12 +215,46 @@ def generate_id_map(symbols: List[str]) -> Dict[str, int]:
    return {sym: i for i, sym in enumerate(symbols)}


+def add_self_loops(
+    arcs: List[List[Any]], disambig_phone: int, disambig_word: int
+) -> List[List[Any]]:
+    """Adds self-loops to states of an FST to propagate disambiguation symbols
+    through it. They are added on each state with non-epsilon output symbols
+    on at least one arc out of the state.
+
+    See also fstaddselfloops.pl from Kaldi. One difference is that
+    Kaldi uses OpenFst style FSTs and it has multiple final states.
+    This function uses k2 style FSTs and it does not need to add self-loops
+    to the final state.
+
+    Args:
+      arcs:
+        A list-of-list. The sublist contains
+        `[src_state, dest_state, label, aux_label, score]`
+
+    Return:
+      Return new `arcs` that contain self-loops.
+    """
+    states_needs_self_loops = set()
+    for arc in arcs:
+        src, dst, ilable, olable, score = arc
+        if olable != 0:
+            states_needs_self_loops.add(src)
+
+    ans = []
+    for s in states_needs_self_loops:
+        ans.append([s, s, disambig_phone, disambig_word, 0])
+
+    return arcs + ans
+
+
 def lexicon_to_fst(
    lexicon: Lexicon,
    phone2id: Dict[str, int],
    word2id: Dict[str, int],
    sil_phone: str = "SIL",
    sil_prob: float = 0.5,
+    need_self_loops: bool = False,
 ) -> k2.Fsa:
    """Convert a lexicon to an FST (in k2 format) with optional silence at
    the beginning and end of the word.
@ -235,6 +271,9 @@ def lexicon_to_fst(
      sil_prob:
        The probability for adding a silence at the beginning and end
        of the word.
+      need_self_loops:
+        If True, add self-loop to states with non-epsilon output symbols
+        on at least one arc out of the state.
    Returns:
      Return an instance of `k2.Fsa` representing the given lexicon.
    """
@ -285,6 +324,15 @@ def lexicon_to_fst(
        arcs.append([cur_state, loop_state, prons[i], w, no_sil_score])
        arcs.append([cur_state, sil_state, prons[i], w, sil_score])

+    if need_self_loops:
+        disambig_phone = phone2id["#0"]
+        disambig_word = word2id["#0"]
+        arcs = add_self_loops(
+            arcs,
+            disambig_phone=disambig_phone,
+            disambig_word=disambig_word,
+        )
+
    final_state = next_state
    arcs.append([loop_state, final_state, -1, -1, 0])
    arcs.append([final_state])
@ -346,13 +394,10 @@ def main():
        word2id=word2id,
        sil_phone=sil_phone,
        sil_prob=sil_prob,
+        need_self_loops=True,
    )

-    # TODO(fangjun): add self-loops to L_disambig
-    # whose ilabel is phone2id['#0'] and olable is word2id['#0']
-    # Need to implement it in k2
-
-    if True:
+    if False:
        # Just for debugging, will remove it
        torch.save(L.as_dict(), out_dir / "L.pt")
        torch.save(L_disambig.as_dict(), out_dir / "L_disambig.pt")
--- a/egs/librispeech/ASR/prepare.sh
+++ b/egs/librispeech/ASR/prepare.sh
@ -75,3 +75,15 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
  mkdir -p data/fbank
  ./local/compute_fbank_musan.py
 fi
+
+if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
+  echo "Stage 5: Prepare phone based lang"
+  # TODO: add BPE based lang
+  mkdir -p data/lang
+
+  (echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |
+    cat - data/lm/librispeech-lexicon.txt |
+    sort | uniq > data/lang/lexicon.txt
+
+  ./local/prepare_lang.py
+fi