mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
Add self-loops to propagate disambiguation symbols.
This commit is contained in:
parent
8a72901f3a
commit
a01d08f73c
@ -18,15 +18,13 @@ consisting of words and phones and does the following:
|
|||||||
lexicon = k2.Fsa.from_dict(d)
|
lexicon = k2.Fsa.from_dict(d)
|
||||||
|
|
||||||
5. Generate L_disambig.pt, in k2 format.
|
5. Generate L_disambig.pt, in k2 format.
|
||||||
|
|
||||||
6. Generate lexicon_disambig.txt
|
|
||||||
"""
|
"""
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Tuple
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
import k2
|
import k2
|
||||||
import torch
|
import torch
|
||||||
@ -90,6 +88,10 @@ def write_lexicon(filename: str, lexicon: Lexicon) -> None:
|
|||||||
def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
|
def write_mapping(filename: str, sym2id: Dict[str, int]) -> None:
|
||||||
"""Write a symbol to ID mapping to a file.
|
"""Write a symbol to ID mapping to a file.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
No need to implement `read_mapping` as it can be done
|
||||||
|
through :func:`k2.SymbolTable.from_file`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filename:
|
filename:
|
||||||
Filename to save the mapping.
|
Filename to save the mapping.
|
||||||
@ -119,7 +121,7 @@ def get_phones(lexicon: Lexicon) -> List[str]:
|
|||||||
return sorted_ans
|
return sorted_ans
|
||||||
|
|
||||||
|
|
||||||
def get_words(lexicon: List[Tuple[str, List[str]]]) -> List[str]:
|
def get_words(lexicon: Lexicon) -> List[str]:
|
||||||
"""Get words from a lexicon.
|
"""Get words from a lexicon.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -213,12 +215,46 @@ def generate_id_map(symbols: List[str]) -> Dict[str, int]:
|
|||||||
return {sym: i for i, sym in enumerate(symbols)}
|
return {sym: i for i, sym in enumerate(symbols)}
|
||||||
|
|
||||||
|
|
||||||
|
def add_self_loops(
|
||||||
|
arcs: List[List[Any]], disambig_phone: int, disambig_word: int
|
||||||
|
) -> List[List[Any]]:
|
||||||
|
"""Adds self-loops to states of an FST to propagate disambiguation symbols
|
||||||
|
through it. They are added on each state with non-epsilon output symbols
|
||||||
|
on at least one arc out of the state.
|
||||||
|
|
||||||
|
See also fstaddselfloops.pl from Kaldi. One difference is that
|
||||||
|
Kaldi uses OpenFst style FSTs and it has multiple final states.
|
||||||
|
This function uses k2 style FSTs and it does not need to add self-loops
|
||||||
|
to the final state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
arcs:
|
||||||
|
A list-of-list. The sublist contains
|
||||||
|
`[src_state, dest_state, label, aux_label, score]`
|
||||||
|
|
||||||
|
Return:
|
||||||
|
Return new `arcs` that contain self-loops.
|
||||||
|
"""
|
||||||
|
states_needs_self_loops = set()
|
||||||
|
for arc in arcs:
|
||||||
|
src, dst, ilable, olable, score = arc
|
||||||
|
if olable != 0:
|
||||||
|
states_needs_self_loops.add(src)
|
||||||
|
|
||||||
|
ans = []
|
||||||
|
for s in states_needs_self_loops:
|
||||||
|
ans.append([s, s, disambig_phone, disambig_word, 0])
|
||||||
|
|
||||||
|
return arcs + ans
|
||||||
|
|
||||||
|
|
||||||
def lexicon_to_fst(
|
def lexicon_to_fst(
|
||||||
lexicon: Lexicon,
|
lexicon: Lexicon,
|
||||||
phone2id: Dict[str, int],
|
phone2id: Dict[str, int],
|
||||||
word2id: Dict[str, int],
|
word2id: Dict[str, int],
|
||||||
sil_phone: str = "SIL",
|
sil_phone: str = "SIL",
|
||||||
sil_prob: float = 0.5,
|
sil_prob: float = 0.5,
|
||||||
|
need_self_loops: bool = False,
|
||||||
) -> k2.Fsa:
|
) -> k2.Fsa:
|
||||||
"""Convert a lexicon to an FST (in k2 format) with optional silence at
|
"""Convert a lexicon to an FST (in k2 format) with optional silence at
|
||||||
the beginning and end of the word.
|
the beginning and end of the word.
|
||||||
@ -235,6 +271,9 @@ def lexicon_to_fst(
|
|||||||
sil_prob:
|
sil_prob:
|
||||||
The probability for adding a silence at the beginning and end
|
The probability for adding a silence at the beginning and end
|
||||||
of the word.
|
of the word.
|
||||||
|
need_self_loops:
|
||||||
|
If True, add self-loop to states with non-epsilon output symbols
|
||||||
|
on at least one arc out of the state.
|
||||||
Returns:
|
Returns:
|
||||||
Return an instance of `k2.Fsa` representing the given lexicon.
|
Return an instance of `k2.Fsa` representing the given lexicon.
|
||||||
"""
|
"""
|
||||||
@ -285,6 +324,15 @@ def lexicon_to_fst(
|
|||||||
arcs.append([cur_state, loop_state, prons[i], w, no_sil_score])
|
arcs.append([cur_state, loop_state, prons[i], w, no_sil_score])
|
||||||
arcs.append([cur_state, sil_state, prons[i], w, sil_score])
|
arcs.append([cur_state, sil_state, prons[i], w, sil_score])
|
||||||
|
|
||||||
|
if need_self_loops:
|
||||||
|
disambig_phone = phone2id["#0"]
|
||||||
|
disambig_word = word2id["#0"]
|
||||||
|
arcs = add_self_loops(
|
||||||
|
arcs,
|
||||||
|
disambig_phone=disambig_phone,
|
||||||
|
disambig_word=disambig_word,
|
||||||
|
)
|
||||||
|
|
||||||
final_state = next_state
|
final_state = next_state
|
||||||
arcs.append([loop_state, final_state, -1, -1, 0])
|
arcs.append([loop_state, final_state, -1, -1, 0])
|
||||||
arcs.append([final_state])
|
arcs.append([final_state])
|
||||||
@ -346,13 +394,10 @@ def main():
|
|||||||
word2id=word2id,
|
word2id=word2id,
|
||||||
sil_phone=sil_phone,
|
sil_phone=sil_phone,
|
||||||
sil_prob=sil_prob,
|
sil_prob=sil_prob,
|
||||||
|
need_self_loops=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO(fangjun): add self-loops to L_disambig
|
if False:
|
||||||
# whose ilabel is phone2id['#0'] and olable is word2id['#0']
|
|
||||||
# Need to implement it in k2
|
|
||||||
|
|
||||||
if True:
|
|
||||||
# Just for debugging, will remove it
|
# Just for debugging, will remove it
|
||||||
torch.save(L.as_dict(), out_dir / "L.pt")
|
torch.save(L.as_dict(), out_dir / "L.pt")
|
||||||
torch.save(L_disambig.as_dict(), out_dir / "L_disambig.pt")
|
torch.save(L_disambig.as_dict(), out_dir / "L_disambig.pt")
|
||||||
|
@ -75,3 +75,15 @@ if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
|
|||||||
mkdir -p data/fbank
|
mkdir -p data/fbank
|
||||||
./local/compute_fbank_musan.py
|
./local/compute_fbank_musan.py
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||||
|
echo "Stage 5: Prepare phone based lang"
|
||||||
|
# TODO: add BPE based lang
|
||||||
|
mkdir -p data/lang
|
||||||
|
|
||||||
|
(echo '!SIL SIL'; echo '<SPOKEN_NOISE> SPN'; echo '<UNK> SPN'; ) |
|
||||||
|
cat - data/lm/librispeech-lexicon.txt |
|
||||||
|
sort | uniq > data/lang/lexicon.txt
|
||||||
|
|
||||||
|
./local/prepare_lang.py
|
||||||
|
fi
|
||||||
|
Loading…
x
Reference in New Issue
Block a user