Fix typos.

This commit is contained in:
Fangjun Kuang 2022-03-07 16:19:06 +08:00
parent 5df6040df0
commit fb63ed627d
2 changed files with 19 additions and 17 deletions

View File

@ -29,12 +29,12 @@ from model import Transducer
# acoustic frame indexes) and the vertical axis is `u` (representing # acoustic frame indexes) and the vertical axis is `u` (representing
# BPE tokens of the transcript). # BPE tokens of the transcript).
# #
# Beam search is used to find the path that with the # Beam search is used to find the path with the
# highest log probabilities. # highest log probabilities.
# #
# It assumes that the maximum number of symbols that can be # It assumes the maximum number of symbols that can be
# emitted per frame is 1. You can use `--modified-transducer-prob` # emitted per frame is 1. You can use `--modified-transducer-prob`
# from train.py to train a model that satisfy this assumption. # from `./train.py` to train a model that satisfies this assumption.
# AlignItem is a node in the lattice, where its # AlignItem is a node in the lattice, where its
@ -42,13 +42,13 @@ from model import Transducer
# in the lattice. # in the lattice.
@dataclass @dataclass
class AlignItem: class AlignItem:
# log prob of this # log prob of this item originating from the start item
log_prob: float log_prob: float
# It contains framewise token alignment # It contains framewise token alignment
ys: List[int] ys: List[int]
# It equals to number of non-zero entries in ys # It equals to the number of non-zero entries in ys
pos_u: int pos_u: int
@ -232,13 +232,13 @@ def force_alignment(
return ans return ans
def get_word_begin_frame( def get_word_starting_frame(
ali: List[int], sp: spm.SentencePieceProcessor ali: List[int], sp: spm.SentencePieceProcessor
) -> List[int]: ) -> List[int]:
"""Get the beginning of each word from the given alignments. """Get the starting frame of each word from the given alignments.
When a word is encoded into BPE tokens, the first token starts When a word is encoded into BPE tokens, the first token starts
with underscore "_", which can be used to identify the beginning with underscore "_", which can be used to identify the starting frame
of a word. of a word.
Args: Args:

View File

@ -43,7 +43,7 @@ from pathlib import Path
import sentencepiece as spm import sentencepiece as spm
import torch import torch
from alignment import get_word_begin_frame from alignment import get_word_starting_frame
from lhotse import CutSet, load_manifest from lhotse import CutSet, load_manifest
from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler
from lhotse.dataset.collation import collate_custom_field from lhotse.dataset.collation import collate_custom_field
@ -121,7 +121,7 @@ def main():
# key: cut.id # key: cut.id
# value: a list of pairs (word, time_in_second) # value: a list of pairs (word, time_in_second)
word_begin_time_dict = {} word_starting_time_dict = {}
for batch in dl: for batch in dl:
supervisions = batch["supervisions"] supervisions = batch["supervisions"]
cuts = supervisions["cut"] cuts = supervisions["cut"]
@ -135,23 +135,25 @@ def main():
(cuts[i].features.num_frames - 1) // 2 - 1 (cuts[i].features.num_frames - 1) // 2 - 1
) // 2 == token_alignment_length[i] ) // 2 == token_alignment_length[i]
word_begin_frame = get_word_begin_frame( word_starting_frame = get_word_starting_frame(
token_alignment[i, : token_alignment_length[i]].tolist(), sp=sp token_alignment[i, : token_alignment_length[i]].tolist(), sp=sp
) )
word_begin_time = [ word_starting_time = [
"{:.2f}".format(i * frame_shift_in_second) "{:.2f}".format(i * frame_shift_in_second)
for i in word_begin_frame for i in word_starting_frame
] ]
words = supervisions["text"][i].split() words = supervisions["text"][i].split()
assert len(word_begin_frame) == len(words) assert len(word_starting_frame) == len(words)
word_begin_time_dict[cuts[i].id] = list(zip(words, word_begin_time)) word_starting_time_dict[cuts[i].id] = list(
zip(words, word_starting_time)
)
# This is a demo script and we exit here after processing # This is a demo script and we exit here after processing
# one batch. # one batch.
# You can find word starting time in the dict "word_begin_time_dict" # You can find word starting time in the dict "word_starting_time_dict"
for cut_id, word_time in word_begin_time_dict.items(): for cut_id, word_time in word_starting_time_dict.items():
print(f"{cut_id}\n{word_time}\n") print(f"{cut_id}\n{word_time}\n")
break break