train_bpe_model: --unk-surface option

when option is set, pass value to sentencepiece train
This commit is contained in:
Peter Ross 2023-06-08 16:48:03 +10:00
parent 3ae47a4940
commit 7cdc0da339

View File

@ -54,6 +54,12 @@ def get_args():
help="Vocabulary size for BPE training", help="Vocabulary size for BPE training",
) )
parser.add_argument(
"--unk-surface",
type=str,
help="Unknown symbol surface",
)
return parser.parse_args() return parser.parse_args()
@ -88,6 +94,7 @@ def main():
unk_id=unk_id, unk_id=unk_id,
bos_id=-1, bos_id=-1,
eos_id=-1, eos_id=-1,
**(dict(unk_surface=args.unk_surface) if args.unk_surface else dict()),
) )
else: else:
print(f"{model_file} exists - skipping") print(f"{model_file} exists - skipping")