Merge 7cdc0da3391e5c3cdd8b7ee9a9db52a7a1d6e641 into abd9437e6d5419a497707748eb935e50976c3b7b

This commit is contained in:
Peter Ross 2025-06-27 11:33:23 +00:00 committed by GitHub
commit d07ad97691
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -55,6 +55,12 @@ def get_args():
help="Vocabulary size for BPE training",
)
parser.add_argument(
"--unk-surface",
type=str,
help="Unknown symbol surface",
)
return parser.parse_args()
@ -101,6 +107,7 @@ def main():
unk_id=unk_id,
bos_id=-1,
eos_id=-1,
**(dict(unk_surface=args.unk_surface) if args.unk_surface else dict()),
)
else:
print(f"{model_file} exists - skipping")