From d15605f660c91c9a8dacb9ecbd8d68f3ac110ecf Mon Sep 17 00:00:00 2001 From: jinzr Date: Thu, 23 Nov 2023 11:20:37 +0800 Subject: [PATCH] Update prepare_for_bpe_model.py --- egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py b/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py index c753392cb..00514e6bb 100755 --- a/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py +++ b/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py @@ -40,7 +40,7 @@ def get_args(): parser.add_argument( "--text", type=str, - help="WenetSpeech training transcript.", + help="Training transcript.", ) return parser.parse_args() @@ -56,7 +56,7 @@ def main(): transcript_path = lang_dir / "transcript_chars.txt" with open(text, "r", encoding="utf-8") as fin: - with open(transcript_path, "w", encoding="utf-8") as fout: + with open(transcript_path, "w+", encoding="utf-8") as fout: for line in tqdm(fin): fout.write(tokenize_by_CJK_char(line) + "\n")