Merge 7cdc0da3391e5c3cdd8b7ee9a9db52a7a1d6e641 into abd9437e6d5419a497707748eb935e50976c3b7b

This commit is contained in:
Peter Ross 2025-06-27 11:33:23 +00:00 committed by GitHub
commit d07ad97691
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -55,6 +55,12 @@ def get_args():
help="Vocabulary size for BPE training", help="Vocabulary size for BPE training",
) )
parser.add_argument(
"--unk-surface",
type=str,
help="Unknown symbol surface",
)
return parser.parse_args() return parser.parse_args()
@ -101,6 +107,7 @@ def main():
unk_id=unk_id, unk_id=unk_id,
bos_id=-1, bos_id=-1,
eos_id=-1, eos_id=-1,
**(dict(unk_surface=args.unk_surface) if args.unk_surface else dict()),
) )
else: else:
print(f"{model_file} exists - skipping") print(f"{model_file} exists - skipping")