From 636121c507c4e0de7e47f76fcd781944492d558a Mon Sep 17 00:00:00 2001 From: Bailey Hirota Date: Wed, 14 May 2025 08:37:44 +0900 Subject: [PATCH] remove bilingual tag from train.py --- egs/multi_ja_en/ASR/zipformer/train.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/egs/multi_ja_en/ASR/zipformer/train.py b/egs/multi_ja_en/ASR/zipformer/train.py index 00d238cc5..c05d5edc3 100755 --- a/egs/multi_ja_en/ASR/zipformer/train.py +++ b/egs/multi_ja_en/ASR/zipformer/train.py @@ -791,7 +791,7 @@ def compute_loss( warm_step = params.warm_step texts = batch["supervisions"]["text"] - y = sentencepiece_processor.encode(texts, out_type=int) + y = sentencepiece_processor.encode(texts, out_type=int) y = k2.RaggedTensor(y) with torch.set_grad_enabled(is_training): @@ -1120,7 +1120,7 @@ def run(rank, world_size, args): # is defined in local/prepare_lang_char.py params.blank_id = sentencepiece_processor.piece_to_id("") - params.vocab_size = sentencepiece_processor.get_piece_size() + arams.vocab_size = sentencepiece_processor.get_piece_size() if not params.use_transducer: params.ctc_loss_scale = 1.0 @@ -1185,12 +1185,15 @@ def run(rank, world_size, args): train_cuts = multi_dataset.train_cuts() def remove_short_and_long_utt(c: Cut): - # Keep only utterances greater than 1 second + # Keep only utterances with duration between 1 second and 30 seconds + # + # Caution: There is a reason to select 30.0 here. Please see + # ../local/display_manifest_statistics.py # # You should use ../local/display_manifest_statistics.py to get # an utterance duration distribution for your dataset to select - # the threshold as this is dependent on which datasets you choose - if c.duration < 1.0: + # the threshold + if c.duration < 1.0 or c.duration > 30.0: logging.warning( f"Exclude cut with ID {c.id} from training. Duration: {c.duration}" )