mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 10:16:14 +00:00
Update preprocess_mdcc.py
This commit is contained in:
parent
8b96efe9e5
commit
336944c9b3
@ -89,9 +89,9 @@ def get_word_segments(lines: List[str]) -> List[str]:
|
|||||||
for line in tqdm(lines, desc="Segmenting lines"):
|
for line in tqdm(lines, desc="Segmenting lines"):
|
||||||
try:
|
try:
|
||||||
# code switching
|
# code switching
|
||||||
if len(line.split(" ")) > 1:
|
if len(line.strip().split(" ")) > 1:
|
||||||
segments = []
|
segments = []
|
||||||
for segment in line.split(" "):
|
for segment in line.strip().split(" "):
|
||||||
try:
|
try:
|
||||||
if not is_cjk(segment[0]): # en segment
|
if not is_cjk(segment[0]): # en segment
|
||||||
segments.append(segment)
|
segments.append(segment)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user