From 9bf88ac3b194ddb728993f8955f389f88140c1a1 Mon Sep 17 00:00:00 2001 From: jinzr Date: Wed, 13 Mar 2024 12:01:34 +0800 Subject: [PATCH] Update train_char.py --- egs/commonvoice/ASR/zipformer/train_char.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/commonvoice/ASR/zipformer/train_char.py b/egs/commonvoice/ASR/zipformer/train_char.py index 179a0651a..31af7ef8c 100644 --- a/egs/commonvoice/ASR/zipformer/train_char.py +++ b/egs/commonvoice/ASR/zipformer/train_char.py @@ -97,6 +97,7 @@ from icefall.utils import ( get_parameter_groups_with_lrs, setup_logger, str2bool, + tokenize_by_CJK_char, ) LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] @@ -356,7 +357,7 @@ def compute_loss( batch_idx_train = params.batch_idx_train warm_step = params.warm_step - texts = batch["supervisions"]["text"] + texts = [tokenize_by_CJK_char(text) for text in batch["supervisions"]["text"]] y = graph_compiler.texts_to_ids(texts) y = k2.RaggedTensor(y).to(device)