check bpe mode exists.

This commit is contained in:
Yaguang Hu 2023-09-27 14:54:17 +08:00
parent 2318c3fbd0
commit 8f32a31331

View File

@ -15,7 +15,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# You can install sentencepiece via: # You can install sentencepiece via:
# #
# pip install sentencepiece # pip install sentencepiece
@ -74,6 +73,11 @@ def main():
model_type = "unigram" model_type = "unigram"
model_prefix = f"{lang_dir}/{model_type}_{vocab_size}" model_prefix = f"{lang_dir}/{model_type}_{vocab_size}"
model_file = Path(model_prefix + ".model")
if model_file.is_file():
print(f"{model_file} exists - skipping")
return
character_coverage = 1.0 character_coverage = 1.0
input_sentence_size = 100000000 input_sentence_size = 100000000
@ -88,8 +92,6 @@ def main():
_convert_to_bchar(args.transcript, train_text) _convert_to_bchar(args.transcript, train_text)
model_file = Path(model_prefix + ".model")
if not model_file.is_file():
spm.SentencePieceTrainer.train( spm.SentencePieceTrainer.train(
input=train_text, input=train_text,
vocab_size=vocab_size, vocab_size=vocab_size,
@ -102,9 +104,6 @@ def main():
bos_id=-1, bos_id=-1,
eos_id=-1, eos_id=-1,
) )
else:
print(f"{model_file} exists - skipping")
return
shutil.copyfile(model_file, f"{lang_dir}/bbpe.model") shutil.copyfile(model_file, f"{lang_dir}/bbpe.model")