From fb63ed627d185a8d936c45b5570140baee0d24b0 Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Mon, 7 Mar 2022 16:19:06 +0800
Subject: [PATCH] Fix typos.

---
 .../ASR/transducer_stateless/alignment.py     | 16 +++++++--------
 .../transducer_stateless/test_compute_ali.py  | 20 ++++++++++---------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/egs/librispeech/ASR/transducer_stateless/alignment.py b/egs/librispeech/ASR/transducer_stateless/alignment.py
index 492a6fc51..a1101afe4 100644
--- a/egs/librispeech/ASR/transducer_stateless/alignment.py
+++ b/egs/librispeech/ASR/transducer_stateless/alignment.py
@@ -29,12 +29,12 @@ from model import Transducer
 # acoustic frame indexes) and the vertical axis is `u` (representing
 # BPE tokens of the transcript).
 #
-# Beam search is used to find the path that with the
+# Beam search is used to find the path with the
 # highest log probabilities.
 #
-# It assumes that the maximum number of symbols that can be
+# It assumes the maximum number of symbols that can be
 # emitted per frame is 1. You can use `--modified-transducer-prob`
-# from train.py to train a model that satisfy this assumption.
+# from `./train.py` to train a model that satisfies this assumption.
 
 
 # AlignItem is a node in the lattice, where its
@@ -42,13 +42,13 @@ from model import Transducer
 # in the lattice.
 @dataclass
 class AlignItem:
-    # log prob of this
+    # log prob of this item originating from the start item
     log_prob: float
 
     # It contains framewise token alignment
     ys: List[int]
 
-    # It equals to number of non-zero entries in ys
+    # It equals to the number of non-zero entries in ys
     pos_u: int
 
 
@@ -232,13 +232,13 @@ def force_alignment(
     return ans
 
 
-def get_word_begin_frame(
+def get_word_starting_frame(
     ali: List[int], sp: spm.SentencePieceProcessor
 ) -> List[int]:
-    """Get the beginning of each word from the given alignments.
+    """Get the starting frame of each word from the given alignments.
 
     When a word is encoded into BPE tokens, the first token starts
-    with underscore "_", which can be used to identify the beginning
+    with underscore "_", which can be used to identify the starting frame
     of a word.
 
     Args:
diff --git a/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py b/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
index ffb270ae7..fed3c121c 100755
--- a/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
+++ b/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
@@ -43,7 +43,7 @@ from pathlib import Path
 
 import sentencepiece as spm
 import torch
-from alignment import get_word_begin_frame
+from alignment import get_word_starting_frame
 from lhotse import CutSet, load_manifest
 from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler
 from lhotse.dataset.collation import collate_custom_field
@@ -121,7 +121,7 @@ def main():
 
     # key: cut.id
     # value: a list of pairs (word, time_in_second)
-    word_begin_time_dict = {}
+    word_starting_time_dict = {}
     for batch in dl:
         supervisions = batch["supervisions"]
         cuts = supervisions["cut"]
@@ -135,23 +135,25 @@ def main():
                 (cuts[i].features.num_frames - 1) // 2 - 1
             ) // 2 == token_alignment_length[i]
 
-            word_begin_frame = get_word_begin_frame(
+            word_starting_frame = get_word_starting_frame(
                 token_alignment[i, : token_alignment_length[i]].tolist(), sp=sp
             )
-            word_begin_time = [
+            word_starting_time = [
                 "{:.2f}".format(i * frame_shift_in_second)
-                for i in word_begin_frame
+                for i in word_starting_frame
             ]
 
             words = supervisions["text"][i].split()
 
-            assert len(word_begin_frame) == len(words)
-            word_begin_time_dict[cuts[i].id] = list(zip(words, word_begin_time))
+            assert len(word_starting_frame) == len(words)
+            word_starting_time_dict[cuts[i].id] = list(
+                zip(words, word_starting_time)
+            )
 
         # This is a demo script and we exit here after processing
         # one batch.
-        # You can find word starting time in the dict "word_begin_time_dict"
-        for cut_id, word_time in word_begin_time_dict.items():
+        # You can find word starting time in the dict "word_starting_time_dict"
+        for cut_id, word_time in word_starting_time_dict.items():
             print(f"{cut_id}\n{word_time}\n")
         break