Add self-loops to propagate disambiguation symbols.

2025-08-09 01:52:41 +00:00 · 2021-07-21 13:12:20 +08:00 · 2021-07-21 13:12:20 +08:00 · a01d08f73c
commit a01d08f73c
parent 8a72901f3a
2 changed files with 66 additions and 9 deletions
--- a/egs/librispeech/ASR/local/prepare_lang.py
+++ b/egs/librispeech/ASR/local/prepare_lang.py
@ -18,15 +18,13 @@ consisting of words and phones and does the following:
        lexicon = k2.Fsa.from_dict(d)
 5. Generate L_disambig.pt, in k2 format.
 6. Generate lexicon_disambig.txt
 """
 import math
 import re
 import sys
 from collections import defaultdict
 from pathlib import Path
-from typing import Dict, List, Tuple
+from typing import Any, Dict, List, Tuple
 import k2
 import torch
@ -90,6 +88,10 @@ def write_lexicon(filename: str, lexicon: Lexicon) -> None:
 def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
    """Write a symbol to ID mapping to a file.
    Note:
      No need to implement `read_mapping` as it can be done
      through :func:`k2.SymbolTable.from_file`.
    Args:
      filename:
        Filename to save the mapping.
@ -119,7 +121,7 @@ def get_phones(lexicon: Lexicon) -> List[str]:
    return sorted_ans
-def get_words(lexicon: List[Tuple[str, List[str]]]) -> List[str]:
+def get_words(lexicon: Lexicon) -> List[str]:
    """Get words from a lexicon.
    Args:
@ -213,12 +215,46 @@ def generate_id_map(symbols: List[str]) -> Dict[str, int]:
    return {sym: i for i, sym in enumerate(symbols)}
 def add_self_loops(
    arcs: List[List[Any]], disambig_phone: int, disambig_word: int
 ) -> List[List[Any]]:
    """Adds self-loops to states of an FST to propagate disambiguation symbols
    through it. They are added on each state with non-epsilon output symbols
    on at least one arc out of the state.
    See also fstaddselfloops.pl from Kaldi. One difference is that
    Kaldi uses OpenFst style FSTs and it has multiple final states.
    This function uses k2 style FSTs and it does not need to add self-loops
    to the final state.
    Args:
      arcs:
        A list-of-list. The sublist contains
        `[src_state, dest_state, label, aux_label, score]`
    Return:
      Return new `arcs` that contain self-loops.
    """
    states_needs_self_loops = set()
    for arc in arcs:
        src, dst, ilable, olable, score = arc
        if olable != 0:
            states_needs_self_loops.add(src)
    ans = []
    for s in states_needs_self_loops:
        ans.append([s, s, disambig_phone, disambig_word, 0])
    return arcs + ans
 def lexicon_to_fst(
    lexicon: Lexicon,
    phone2id: Dict[str, int],
    word2id: Dict[str, int],
    sil_phone: str = "SIL",
    sil_prob: float = 0.5,
    need_self_loops: bool = False,
 ) -> k2.Fsa:
    """Convert a lexicon to an FST (in k2 format) with optional silence at
    the beginning and end of the word.
@ -235,6 +271,9 @@ def lexicon_to_fst(
      sil_prob:
        The probability for adding a silence at the beginning and end
        of the word.
      need_self_loops:
        If True, add self-loop to states with non-epsilon output symbols
        on at least one arc out of the state.
    Returns:
      Return an instance of `k2.Fsa` representing the given lexicon.
    """
@ -285,6 +324,15 @@ def lexicon_to_fst(
        arcs.append([cur_state, loop_state, prons[i], w, no_sil_score])
        arcs.append([cur_state, sil_state, prons[i], w, sil_score])
    if need_self_loops:
        disambig_phone = phone2id["#0"]
        disambig_word = word2id["#0"]
        arcs = add_self_loops(
            arcs,
            disambig_phone=disambig_phone,
            disambig_word=disambig_word,
        )
    final_state = next_state
    arcs.append([loop_state, final_state, -1, -1, 0])
    arcs.append([final_state])
@ -346,13 +394,10 @@ def main():
        word2id=word2id,
        sil_phone=sil_phone,
        sil_prob=sil_prob,
        need_self_loops=True,
    )
-    # TODO(fangjun): add self-loops to L_disambig
+    if False:
    # whose ilabel is phone2id['#0'] and olable is word2id['#0']
    # Need to implement it in k2
    if True:
        # Just for debugging, will remove it
        torch.save(L.as_dict(), out_dir / "L.pt")
        torch.save(L_disambig.as_dict(), out_dir / "L_disambig.pt")
--- a/egs/librispeech/ASR/prepare.sh
+++ b/egs/librispeech/ASR/prepare.sh
@ -75,3 +75,15 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
  mkdir -p data/fbank
  ./local/compute_fbank_musan.py
 fi
 if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
  echo "Stage 5: Prepare phone based lang"
  # TODO: add BPE based lang
  mkdir -p data/lang
  (echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |
    cat - data/lm/librispeech-lexicon.txt |
    sort | uniq > data/lang/lexicon.txt
  ./local/prepare_lang.py
 fi