mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 01:52:41 +00:00
load_manifest_lazy for asr_datamodule.py (#453)
This commit is contained in:
parent
29e407fd04
commit
c10aec5656
@ -192,13 +192,6 @@ class WenetSpeechAsrDataModule:
|
||||
"with training dataset. ",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--lazy-load",
|
||||
type=str2bool,
|
||||
default=True,
|
||||
help="lazily open CutSets to avoid OOM (for L|XL subset)",
|
||||
)
|
||||
|
||||
group.add_argument(
|
||||
"--training-subset",
|
||||
type=str,
|
||||
@ -420,14 +413,7 @@ class WenetSpeechAsrDataModule:
|
||||
@lru_cache()
|
||||
def train_cuts(self) -> CutSet:
|
||||
logging.info("About to get train cuts")
|
||||
if self.args.lazy_load:
|
||||
logging.info("use lazy cuts")
|
||||
cuts_train = CutSet.from_jsonl_lazy(
|
||||
self.args.manifest_dir
|
||||
/ f"cuts_{self.args.training_subset}.jsonl.gz"
|
||||
)
|
||||
else:
|
||||
cuts_train = CutSet.from_file(
|
||||
cuts_train = load_manifest_lazy(
|
||||
self.args.manifest_dir
|
||||
/ f"cuts_{self.args.training_subset}.jsonl.gz"
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user