diff --git a/egs/commonvoice/ASR/zipformer/train_char.py b/egs/commonvoice/ASR/zipformer/train_char.py index 179a0651a..31af7ef8c 100644 --- a/egs/commonvoice/ASR/zipformer/train_char.py +++ b/egs/commonvoice/ASR/zipformer/train_char.py @@ -97,6 +97,7 @@ from icefall.utils import ( get_parameter_groups_with_lrs, setup_logger, str2bool, + tokenize_by_CJK_char, ) LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler] @@ -356,7 +357,7 @@ def compute_loss( batch_idx_train = params.batch_idx_train warm_step = params.warm_step - texts = batch["supervisions"]["text"] + texts = [tokenize_by_CJK_char(text) for text in batch["supervisions"]["text"]] y = graph_compiler.texts_to_ids(texts) y = k2.RaggedTensor(y).to(device)