Update prepare_lm_training_data.py

This commit is contained in:
jinzr 2023-11-08 10:20:10 +08:00
parent 7f53f59776
commit 403e2e52ac

View File

@ -99,9 +99,9 @@ def main():
with open(args.lm_data) as f: with open(args.lm_data) as f:
while True: while True:
line = f.readline() line = f.readline()
line = tokenize_by_CJK_char(line)
if line == "": if line == "":
break break
line = tokenize_by_CJK_char(line)
if step and processed % step == 0: if step and processed % step == 0:
logging.info(f"Processed number of lines: {processed} ") logging.info(f"Processed number of lines: {processed} ")