diff --git a/egs/aishell/ASR/local/compute_fbank_musan.py b/egs/aishell/ASR/local/compute_fbank_musan.py index 0a5d82ccb..aeffc1754 100755 --- a/egs/aishell/ASR/local/compute_fbank_musan.py +++ b/egs/aishell/ASR/local/compute_fbank_musan.py @@ -108,4 +108,3 @@ if __name__ == "__main__": logging.basicConfig(format=formatter, level=logging.INFO) args = get_args() compute_fbank_musan(num_mel_bins=args.num_mel_bins) - diff --git a/egs/aishell/ASR/local/prepare_char.py b/egs/aishell/ASR/local/prepare_char.py index 421b1c3d6..e0d4a1976 100755 --- a/egs/aishell/ASR/local/prepare_char.py +++ b/egs/aishell/ASR/local/prepare_char.py @@ -86,7 +86,8 @@ def lexicon_to_fst_no_sil( cur_state = loop_state word = word2id[word] - pieces = [token2id[i] if i in token2id else token2id[''] for i in pieces] + pieces = [token2id[i] if i in token2id else token2id[''] + for i in pieces] for i in range(len(pieces) - 1): w = word if i == 0 else eps @@ -135,7 +136,7 @@ def contain_oov(token_sym_table: Dict[str, int], tokens: List[str]) -> bool: otherwise False. """ for tok in tokens: - if not tok in token_sym_table: + if tok not in token_sym_table: return True return False @@ -186,7 +187,7 @@ def generate_tokens(text_file: str) -> Dict[str, int]: line = re.sub(whitespace, "", line) chars = list(line) for char in chars: - if not char in tokens: + if char not in tokens: tokens[char] = len(tokens) return tokens diff --git a/icefall/char_graph_compiler.py b/icefall/char_graph_compiler.py index 9ce948f7c..ddc7ec1f7 100644 --- a/icefall/char_graph_compiler.py +++ b/icefall/char_graph_compiler.py @@ -54,7 +54,6 @@ class CharCtcTrainingGraphCompiler(object): self.sos_id = self.token_table[sos_token] self.eos_id = self.token_table[eos_token] - def texts_to_ids(self, texts: List[str]) -> List[List[int]]: """Convert a list of texts to a list-of-list of token IDs. @@ -71,12 +70,11 @@ class CharCtcTrainingGraphCompiler(object): whitespace = re.compile(r"([ \t])") for text in texts: text = re.sub(whitespace, "", text) - sub_ids = [self.token_table[txt] if txt in self.token_table \ + sub_ids = [self.token_table[txt] if txt in self.token_table else self.oov_id for txt in text] ids.append(sub_ids) return ids - def compile( self, token_ids: List[List[int]], @@ -95,4 +93,3 @@ class CharCtcTrainingGraphCompiler(object): piece IDs. """ return k2.ctc_graph(token_ids, modified=modified, device=self.device) -