Update preprocess_mdcc.py

This commit is contained in:
jinzr 2024-03-08 19:25:13 +08:00
parent 336944c9b3
commit 7d22fef6f2

View File

@ -92,6 +92,8 @@ def get_word_segments(lines: List[str]) -> List[str]:
if len(line.strip().split(" ")) > 1:
segments = []
for segment in line.strip().split(" "):
if segment.strip() == "":
continue
try:
if not is_cjk(segment[0]): # en segment
segments.append(segment)