Replace load_manifest_lazy with load_manifest for MUSAN. (#412)

This commit is contained in:
Fangjun Kuang 2022-06-09 11:42:18 +08:00 committed by GitHub
parent ed66877694
commit dbda1644b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 28 additions and 25 deletions

View File

@ -27,6 +27,7 @@ from lhotse import (
CutSet, CutSet,
Fbank, Fbank,
FbankConfig, FbankConfig,
load_manifest,
load_manifest_lazy, load_manifest_lazy,
set_caching_enabled, set_caching_enabled,
) )
@ -204,7 +205,7 @@ class Aidatatang_200zhAsrDataModule:
The state dict for the training sampler. The state dict for the training sampler.
""" """
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )

View File

@ -23,7 +23,7 @@ from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import List from typing import List
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -183,7 +183,7 @@ class AishellAsrDataModule:
def train_dataloaders(self, cuts_train: CutSet) -> DataLoader: def train_dataloaders(self, cuts_train: CutSet) -> DataLoader:
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )

View File

@ -56,7 +56,7 @@ from asr_datamodule import AsrDataModule
from conformer import Conformer from conformer import Conformer
from decoder import Decoder from decoder import Decoder
from joiner import Joiner from joiner import Joiner
from lhotse import CutSet, load_manifest_lazy from lhotse import CutSet, load_manifest
from lhotse.cut import Cut from lhotse.cut import Cut
from lhotse.utils import fix_random_seed from lhotse.utils import fix_random_seed
from model import Transducer from model import Transducer
@ -735,7 +735,7 @@ def run(rank, world_size, args):
train_datatang_cuts = train_datatang_cuts.repeat(times=None) train_datatang_cuts = train_datatang_cuts.repeat(times=None)
if args.enable_musan: if args.enable_musan:
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
Path(args.manifest_dir) / "musan_cuts.jsonl.gz" Path(args.manifest_dir) / "musan_cuts.jsonl.gz"
) )
else: else:

View File

@ -27,6 +27,7 @@ from lhotse import (
CutSet, CutSet,
Fbank, Fbank,
FbankConfig, FbankConfig,
load_manifest,
load_manifest_lazy, load_manifest_lazy,
set_caching_enabled, set_caching_enabled,
) )
@ -204,7 +205,7 @@ class AlimeetingAsrDataModule:
The state dict for the training sampler. The state dict for the training sampler.
""" """
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )

View File

@ -20,7 +20,7 @@ import logging
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -189,7 +189,7 @@ class GigaSpeechAsrDataModule:
def train_dataloaders(self, cuts_train: CutSet) -> DataLoader: def train_dataloaders(self, cuts_train: CutSet) -> DataLoader:
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )

View File

@ -23,7 +23,7 @@ from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -216,7 +216,7 @@ class GigaSpeechAsrDataModule:
if self.args.enable_musan: if self.args.enable_musan:
logging.info("Enable MUSAN") logging.info("Enable MUSAN")
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )
transforms.append( transforms.append(

View File

@ -66,7 +66,7 @@ from conformer import Conformer
from decoder import Decoder from decoder import Decoder
from gigaspeech import GigaSpeech from gigaspeech import GigaSpeech
from joiner import Joiner from joiner import Joiner
from lhotse import CutSet, load_manifest_lazy from lhotse import CutSet, load_manifest
from lhotse.cut import Cut from lhotse.cut import Cut
from lhotse.dataset.sampling.base import CutSampler from lhotse.dataset.sampling.base import CutSampler
from lhotse.utils import fix_random_seed from lhotse.utils import fix_random_seed
@ -968,7 +968,7 @@ def run(rank, world_size, args):
train_giga_cuts = train_giga_cuts.repeat(times=None) train_giga_cuts = train_giga_cuts.repeat(times=None)
if args.enable_musan: if args.enable_musan:
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
Path(args.manifest_dir) / "musan_cuts.jsonl.gz" Path(args.manifest_dir) / "musan_cuts.jsonl.gz"
) )
else: else:

View File

@ -24,7 +24,7 @@ from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures from lhotse.dataset import ( # noqa F401 for PrecomputedFeatures
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -224,7 +224,7 @@ class LibriSpeechAsrDataModule:
if self.args.enable_musan: if self.args.enable_musan:
logging.info("Enable MUSAN") logging.info("Enable MUSAN")
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )
transforms.append( transforms.append(

View File

@ -28,7 +28,7 @@ from pathlib import Path
from asr_datamodule import AsrDataModule from asr_datamodule import AsrDataModule
from gigaspeech import GigaSpeech from gigaspeech import GigaSpeech
from lhotse import load_manifest_lazy from lhotse import load_manifest
from librispeech import LibriSpeech from librispeech import LibriSpeech
@ -41,7 +41,7 @@ def test_dataset():
print(args) print(args)
if args.enable_musan: if args.enable_musan:
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
Path(args.manifest_dir) / "musan_cuts.jsonl.gz" Path(args.manifest_dir) / "musan_cuts.jsonl.gz"
) )
else: else:

View File

@ -73,7 +73,7 @@ from conformer import Conformer
from decoder import Decoder from decoder import Decoder
from gigaspeech import GigaSpeech from gigaspeech import GigaSpeech
from joiner import Joiner from joiner import Joiner
from lhotse import CutSet, load_manifest_lazy from lhotse import CutSet, load_manifest
from lhotse.cut import Cut from lhotse.cut import Cut
from lhotse.utils import fix_random_seed from lhotse.utils import fix_random_seed
from librispeech import LibriSpeech from librispeech import LibriSpeech
@ -775,7 +775,7 @@ def run(rank, world_size, args):
train_giga_cuts = train_giga_cuts.repeat(times=None) train_giga_cuts = train_giga_cuts.repeat(times=None)
if args.enable_musan: if args.enable_musan:
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
Path(args.manifest_dir) / "musan_cuts.jsonl.gz" Path(args.manifest_dir) / "musan_cuts.jsonl.gz"
) )
else: else:

View File

@ -22,7 +22,7 @@ from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import torch import torch
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -176,7 +176,7 @@ class SPGISpeechAsrDataModule:
The state dict for the training sampler. The state dict for the training sampler.
""" """
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "cuts_musan.jsonl.gz" self.args.manifest_dir / "cuts_musan.jsonl.gz"
) )

View File

@ -22,7 +22,7 @@ import logging
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -179,7 +179,7 @@ class TedLiumAsrDataModule:
transforms = [] transforms = []
if self.args.enable_musan: if self.args.enable_musan:
logging.info("Enable MUSAN") logging.info("Enable MUSAN")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )
transforms.append( transforms.append(

View File

@ -23,7 +23,7 @@ from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import List, Union from typing import List, Union
from lhotse import CutSet, Fbank, FbankConfig, load_manifest_lazy from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import ( from lhotse.dataset import (
CutConcatenate, CutConcatenate,
CutMix, CutMix,
@ -154,7 +154,7 @@ class TimitAsrDataModule(DataModule):
cuts_train = self.train_cuts() cuts_train = self.train_cuts()
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.feature_dir / "cuts_musan.jsonl.gz" self.args.feature_dir / "cuts_musan.jsonl.gz"
) )

View File

@ -27,6 +27,7 @@ from lhotse import (
CutSet, CutSet,
Fbank, Fbank,
FbankConfig, FbankConfig,
load_manifest,
load_manifest_lazy, load_manifest_lazy,
set_caching_enabled, set_caching_enabled,
) )
@ -218,7 +219,7 @@ class WenetSpeechAsrDataModule:
The state dict for the training sampler. The state dict for the training sampler.
""" """
logging.info("About to get Musan cuts") logging.info("About to get Musan cuts")
cuts_musan = load_manifest_lazy( cuts_musan = load_manifest(
self.args.manifest_dir / "musan_cuts.jsonl.gz" self.args.manifest_dir / "musan_cuts.jsonl.gz"
) )