check bbpe model exists in advance. (#1277)

2025-12-11 06:55:27 +00:00 · 2023-09-27 17:35:26 +08:00 · 2023-09-27 17:35:26 +08:00 · 8181d19860
commit 8181d19860
parent a5ba1133c4
1 changed files with 18 additions and 19 deletions
--- a/egs/aishell/ASR/local/train_bbpe_model.py
+++ b/egs/aishell/ASR/local/train_bbpe_model.py
@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # You can install sentencepiece via:
 #
 #  pip install sentencepiece
@ -26,12 +25,12 @@
 # Please install a version >=0.1.96
 import argparse
 import re
 import shutil
 import tempfile
 from pathlib import Path
 import sentencepiece as spm
 from icefall import byte_encode, tokenize_by_CJK_char
@ -74,6 +73,11 @@ def main():
    model_type = "unigram"
    model_prefix = f"{lang_dir}/{model_type}_{vocab_size}"
    model_file = Path(model_prefix + ".model")
    if model_file.is_file():
        print(f"{model_file} exists - skipping")
        return
    character_coverage = 1.0
    input_sentence_size = 100000000
@ -88,8 +92,6 @@ def main():
    _convert_to_bchar(args.transcript, train_text)
    model_file = Path(model_prefix + ".model")
    if not model_file.is_file():
    spm.SentencePieceTrainer.train(
        input=train_text,
        vocab_size=vocab_size,
@ -102,9 +104,6 @@ def main():
        bos_id=-1,
        eos_id=-1,
    )
    else:
        print(f"{model_file} exists - skipping")
        return
    shutil.copyfile(model_file, f"{lang_dir}/bbpe.model")