diff --git a/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py b/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py index c753392cb..00514e6bb 100755 --- a/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py +++ b/egs/multi_zh_en/ASR/local/prepare_for_bpe_model.py @@ -40,7 +40,7 @@ def get_args(): parser.add_argument( "--text", type=str, - help="WenetSpeech training transcript.", + help="Training transcript.", ) return parser.parse_args() @@ -56,7 +56,7 @@ def main(): transcript_path = lang_dir / "transcript_chars.txt" with open(text, "r", encoding="utf-8") as fin: - with open(transcript_path, "w", encoding="utf-8") as fout: + with open(transcript_path, "w+", encoding="utf-8") as fout: for line in tqdm(fin): fout.write(tokenize_by_CJK_char(line) + "\n")