diff --git a/egs/mdcc/ASR/local/preprocess_mdcc.py b/egs/mdcc/ASR/local/preprocess_mdcc.py index 2e2115de5..436bcad11 100755 --- a/egs/mdcc/ASR/local/preprocess_mdcc.py +++ b/egs/mdcc/ASR/local/preprocess_mdcc.py @@ -118,7 +118,7 @@ def get_word_segments(lines: List[str]) -> List[str]: def get_words(lines: List[str]) -> List[str]: words = set() for line in tqdm(lines, desc="Getting words"): - words.update(line.strip().split(" ")) + words.update(line.replace("\n", "").split(" ")) return list(words)