Merge 7cdc0da3391e5c3cdd8b7ee9a9db52a7a1d6e641 into 34fc1fdf0d8ff520e2bb18267d046ca207c78ef9

This commit is contained in:
Peter Ross 2025-08-17 06:14:23 +00:00 committed by GitHub
commit 0eaf8e4f28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -55,6 +55,12 @@ def get_args():
help="Vocabulary size for BPE training",
)
parser.add_argument(
"--unk-surface",
type=str,
help="Unknown symbol surface",
)
return parser.parse_args()
@ -101,6 +107,7 @@ def main():
unk_id=unk_id,
bos_id=-1,
eos_id=-1,
**(dict(unk_surface=args.unk_surface) if args.unk_surface else dict()),
)
else:
print(f"{model_file} exists - skipping")