From eaaab475090e23b7413c7930c2f9ae7eef136f2d Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 1 Jul 2025 17:20:27 +0800 Subject: [PATCH] Fix for asr_datamodule.py --- egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py index 283252a46..2dcf090ad 100644 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -29,7 +29,6 @@ from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures CutConcatenate, CutMix, DynamicBucketingSampler, - K2SpeechRecognitionDataset, PrecomputedFeatures, SimpleCutSampler, SpecAugment, @@ -39,6 +38,7 @@ from lhotse.dataset.input_strategies import ( # noqa F401 For AudioSamples OnTheFlyFeatures, ) from lhotse.utils import fix_random_seed +from speech_recognition import K2SpeechRecognitionDataset from torch.utils.data import DataLoader from icefall.utils import str2bool @@ -232,8 +232,11 @@ class LibriSpeechAsrDataModule: logging.info("Enable MUSAN") logging.info("About to get Musan cuts") cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz") + + # We use probability 1.0 here so that musan augmentation is + # always performed transforms.append( - CutMix(cuts=cuts_musan, p=0.5, snr=(10, 20), preserve_id=True) + CutMix(cuts=cuts_musan, p=1.0, snr=(10, 20), preserve_id=True) ) else: logging.info("Disable MUSAN")