Update preprocess_mdcc.py

This commit is contained in:
jinzr 2024-03-08 19:32:07 +08:00
parent 27c9a90dc6
commit 123cd9cb6d

View File

@ -118,7 +118,7 @@ def get_word_segments(lines: List[str]) -> List[str]:
def get_words(lines: List[str]) -> List[str]:
words = set()
for line in tqdm(lines, desc="Getting words"):
words.update(line.strip().split(" "))
words.update(line.replace("\n", "").split(" "))
return list(words)