From 336944c9b305004fc77f254b7677553c93a6494b Mon Sep 17 00:00:00 2001 From: jinzr Date: Fri, 8 Mar 2024 19:22:31 +0800 Subject: [PATCH] Update preprocess_mdcc.py --- egs/mdcc/ASR/local/preprocess_mdcc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/mdcc/ASR/local/preprocess_mdcc.py b/egs/mdcc/ASR/local/preprocess_mdcc.py index 108a0bb28..f068425e7 100755 --- a/egs/mdcc/ASR/local/preprocess_mdcc.py +++ b/egs/mdcc/ASR/local/preprocess_mdcc.py @@ -89,9 +89,9 @@ def get_word_segments(lines: List[str]) -> List[str]: for line in tqdm(lines, desc="Segmenting lines"): try: # code switching - if len(line.split(" ")) > 1: + if len(line.strip().split(" ")) > 1: segments = [] - for segment in line.split(" "): + for segment in line.strip().split(" "): try: if not is_cjk(segment[0]): # en segment segments.append(segment)