Minor fixes.

This commit is contained in:
Fangjun Kuang 2022-06-05 23:44:58 +08:00
parent accf0afb45
commit 1235e23fbf
2 changed files with 4 additions and 4 deletions

View File

@ -363,7 +363,7 @@ class GigaSpeechAsrDataModule:
def dev_cuts(self) -> CutSet: def dev_cuts(self) -> CutSet:
logging.info("About to get dev cuts") logging.info("About to get dev cuts")
cuts_valid = load_manifest_lazy( cuts_valid = load_manifest_lazy(
self.args.manifest_dir / "gigaspeech_cuts_DEV.jsonl.gz" self.args.manifest_dir / "cuts_DEV.jsonl.gz"
) )
if self.args.small_dev: if self.args.small_dev:
return cuts_valid.subset(first=1000) return cuts_valid.subset(first=1000)
@ -373,6 +373,4 @@ class GigaSpeechAsrDataModule:
@lru_cache() @lru_cache()
def test_cuts(self) -> CutSet: def test_cuts(self) -> CutSet:
logging.info("About to get test cuts") logging.info("About to get test cuts")
return load_manifest_lazy( return load_manifest_lazy(self.args.manifest_dir / "cuts_TEST.jsonl.gz")
self.args.manifest_dir / "gigaspeech_cuts_TEST.jsonl.gz"
)

View File

@ -700,6 +700,8 @@ def run(rank, world_size, args):
# the threshold # the threshold
return 1.0 <= c.duration <= 20.0 return 1.0 <= c.duration <= 20.0
train_cuts = train_cuts.filter(remove_short_and_long_utt)
train_dl = librispeech.train_dataloaders(train_cuts) train_dl = librispeech.train_dataloaders(train_cuts)
valid_cuts = librispeech.dev_clean_cuts() valid_cuts = librispeech.dev_clean_cuts()