From dbda1644b54d2c989b15e8e89f095c015416bd8d Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 9 Jun 2022 11:42:18 +0800 Subject: [PATCH] Replace load_manifest_lazy with load_manifest for MUSAN. (#412) --- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 3 ++- egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py | 4 ++-- egs/aishell/ASR/transducer_stateless_modified-2/train.py | 4 ++-- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 3 ++- egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py | 4 ++-- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 4 ++-- egs/librispeech/ASR/pruned_transducer_stateless3/train.py | 4 ++-- egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py | 4 ++-- .../test_asr_datamodule.py | 4 ++-- .../ASR/transducer_stateless_multi_datasets/train.py | 4 ++-- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 4 ++-- egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py | 4 ++-- egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py | 4 ++-- .../ASR/pruned_transducer_stateless2/asr_datamodule.py | 3 ++- 14 files changed, 28 insertions(+), 25 deletions(-) diff --git a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py index 728f7e3d0..6a5b57e24 100644 --- a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -27,6 +27,7 @@ from lhotse import ( CutSet, Fbank, FbankConfig, + load_manifest, load_manifest_lazy, set_caching_enabled, ) @@ -204,7 +205,7 @@ class Aidatatang_200zhAsrDataModule: The state dict for the training sampler. """ logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py index e1021fda2..d24ba6bb7 100644 --- a/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/aishell/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -23,7 +23,7 @@ from functools import lru_cache from pathlib import Path from typing import List -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -183,7 +183,7 @@ class AishellAsrDataModule: def train_dataloaders(self, cuts_train: CutSet) -> DataLoader: logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) diff --git a/egs/aishell/ASR/transducer_stateless_modified-2/train.py b/egs/aishell/ASR/transducer_stateless_modified-2/train.py index a6c17198f..0975f309a 100755 --- a/egs/aishell/ASR/transducer_stateless_modified-2/train.py +++ b/egs/aishell/ASR/transducer_stateless_modified-2/train.py @@ -56,7 +56,7 @@ from asr_datamodule import AsrDataModule from conformer import Conformer from decoder import Decoder from joiner import Joiner -from lhotse import CutSet, load_manifest_lazy +from lhotse import CutSet, load_manifest from lhotse.cut import Cut from lhotse.utils import fix_random_seed from model import Transducer @@ -735,7 +735,7 @@ def run(rank, world_size, args): train_datatang_cuts = train_datatang_cuts.repeat(times=None) if args.enable_musan: - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( Path(args.manifest_dir) / "musan_cuts.jsonl.gz" ) else: diff --git a/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py index 339612afe..bf6faad7a 100644 --- a/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/alimeeting/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -27,6 +27,7 @@ from lhotse import ( CutSet, Fbank, FbankConfig, + load_manifest, load_manifest_lazy, set_caching_enabled, ) @@ -204,7 +205,7 @@ class AlimeetingAsrDataModule: The state dict for the training sampler. """ logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) diff --git a/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py b/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py index 62b43146a..d78e26240 100644 --- a/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py +++ b/egs/gigaspeech/ASR/conformer_ctc/asr_datamodule.py @@ -20,7 +20,7 @@ import logging from functools import lru_cache from pathlib import Path -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -189,7 +189,7 @@ class GigaSpeechAsrDataModule: def train_dataloaders(self, cuts_train: CutSet) -> DataLoader: logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) diff --git a/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index 19fe7c6a7..c87686e1e 100644 --- a/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/gigaspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -23,7 +23,7 @@ from pathlib import Path from typing import Any, Dict, Optional import torch -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -216,7 +216,7 @@ class GigaSpeechAsrDataModule: if self.args.enable_musan: logging.info("Enable MUSAN") logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) transforms.append( diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py index c6c160952..92eae78d1 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/train.py @@ -66,7 +66,7 @@ from conformer import Conformer from decoder import Decoder from gigaspeech import GigaSpeech from joiner import Joiner -from lhotse import CutSet, load_manifest_lazy +from lhotse import CutSet, load_manifest from lhotse.cut import Cut from lhotse.dataset.sampling.base import CutSampler from lhotse.utils import fix_random_seed @@ -968,7 +968,7 @@ def run(rank, world_size, args): train_giga_cuts = train_giga_cuts.repeat(times=None) if args.enable_musan: - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( Path(args.manifest_dir) / "musan_cuts.jsonl.gz" ) else: diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py index 5cca06169..355ccc99a 100644 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -24,7 +24,7 @@ from pathlib import Path from typing import Any, Dict, Optional import torch -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures CutConcatenate, CutMix, @@ -224,7 +224,7 @@ class LibriSpeechAsrDataModule: if self.args.enable_musan: logging.info("Enable MUSAN") logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) transforms.append( diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/test_asr_datamodule.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/test_asr_datamodule.py index 3b51ff9bc..ef51a7811 100755 --- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/test_asr_datamodule.py +++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/test_asr_datamodule.py @@ -28,7 +28,7 @@ from pathlib import Path from asr_datamodule import AsrDataModule from gigaspeech import GigaSpeech -from lhotse import load_manifest_lazy +from lhotse import load_manifest from librispeech import LibriSpeech @@ -41,7 +41,7 @@ def test_dataset(): print(args) if args.enable_musan: - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( Path(args.manifest_dir) / "musan_cuts.jsonl.gz" ) else: diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py index 46404732b..32ce1032c 100755 --- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py +++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py @@ -73,7 +73,7 @@ from conformer import Conformer from decoder import Decoder from gigaspeech import GigaSpeech from joiner import Joiner -from lhotse import CutSet, load_manifest_lazy +from lhotse import CutSet, load_manifest from lhotse.cut import Cut from lhotse.utils import fix_random_seed from librispeech import LibriSpeech @@ -775,7 +775,7 @@ def run(rank, world_size, args): train_giga_cuts = train_giga_cuts.repeat(times=None) if args.enable_musan: - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( Path(args.manifest_dir) / "musan_cuts.jsonl.gz" ) else: diff --git a/egs/spgispeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/spgispeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index a674d5527..f165f6e60 100644 --- a/egs/spgispeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/spgispeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -22,7 +22,7 @@ from pathlib import Path from typing import Any, Dict, Optional import torch -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -176,7 +176,7 @@ class SPGISpeechAsrDataModule: The state dict for the training sampler. """ logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "cuts_musan.jsonl.gz" ) diff --git a/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py b/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py index ae22bfd92..51de46ae8 100644 --- a/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py +++ b/egs/tedlium3/ASR/transducer_stateless/asr_datamodule.py @@ -22,7 +22,7 @@ import logging from functools import lru_cache from pathlib import Path -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -179,7 +179,7 @@ class TedLiumAsrDataModule: transforms = [] if self.args.enable_musan: logging.info("Enable MUSAN") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" ) transforms.append( diff --git a/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py b/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py index 665b5a771..5e2923fb6 100644 --- a/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py +++ b/egs/timit/ASR/tdnn_lstm_ctc/asr_datamodule.py @@ -23,7 +23,7 @@ from functools import lru_cache from pathlib import Path from typing import List, Union -from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy +from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy from lhotse.dataset import ( CutConcatenate, CutMix, @@ -154,7 +154,7 @@ class TimitAsrDataModule(DataModule): cuts_train = self.train_cuts() logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.feature_dir / "cuts_musan.jsonl.gz" ) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py index 6aebc2164..200a694d6 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/asr_datamodule.py @@ -27,6 +27,7 @@ from lhotse import ( CutSet, Fbank, FbankConfig, + load_manifest, load_manifest_lazy, set_caching_enabled, ) @@ -218,7 +219,7 @@ class WenetSpeechAsrDataModule: The state dict for the training sampler. """ logging.info("About to get Musan cuts") - cuts_musan = load_manifest_lazy( + cuts_musan = load_manifest( self.args.manifest_dir / "musan_cuts.jsonl.gz" )