From 7cdc0da3391e5c3cdd8b7ee9a9db52a7a1d6e641 Mon Sep 17 00:00:00 2001 From: Peter Ross Date: Thu, 8 Jun 2023 16:48:03 +1000 Subject: [PATCH] train_bpe_model: --unk-surface option when option is set, pass value to sentencepiece train --- egs/librispeech/ASR/local/train_bpe_model.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/egs/librispeech/ASR/local/train_bpe_model.py b/egs/librispeech/ASR/local/train_bpe_model.py index 43142aee4..642f1384a 100755 --- a/egs/librispeech/ASR/local/train_bpe_model.py +++ b/egs/librispeech/ASR/local/train_bpe_model.py @@ -54,6 +54,12 @@ def get_args(): help="Vocabulary size for BPE training", ) + parser.add_argument( + "--unk-surface", + type=str, + help="Unknown symbol surface", + ) + return parser.parse_args() @@ -88,6 +94,7 @@ def main(): unk_id=unk_id, bos_id=-1, eos_id=-1, + **(dict(unk_surface=args.unk_surface) if args.unk_surface else dict()), ) else: print(f"{model_file} exists - skipping")