mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
load_manifest_lazy for asr_datamodule.py (#453)
This commit is contained in:
parent
29e407fd04
commit
c10aec5656
@ -192,13 +192,6 @@ class WenetSpeechAsrDataModule:
|
|||||||
"with training dataset. ",
|
"with training dataset. ",
|
||||||
)
|
)
|
||||||
|
|
||||||
group.add_argument(
|
|
||||||
"--lazy-load",
|
|
||||||
type=str2bool,
|
|
||||||
default=True,
|
|
||||||
help="lazily open CutSets to avoid OOM (for L|XL subset)",
|
|
||||||
)
|
|
||||||
|
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--training-subset",
|
"--training-subset",
|
||||||
type=str,
|
type=str,
|
||||||
@ -420,17 +413,10 @@ class WenetSpeechAsrDataModule:
|
|||||||
@lru_cache()
|
@lru_cache()
|
||||||
def train_cuts(self) -> CutSet:
|
def train_cuts(self) -> CutSet:
|
||||||
logging.info("About to get train cuts")
|
logging.info("About to get train cuts")
|
||||||
if self.args.lazy_load:
|
cuts_train = load_manifest_lazy(
|
||||||
logging.info("use lazy cuts")
|
self.args.manifest_dir
|
||||||
cuts_train = CutSet.from_jsonl_lazy(
|
/ f"cuts_{self.args.training_subset}.jsonl.gz"
|
||||||
self.args.manifest_dir
|
)
|
||||||
/ f"cuts_{self.args.training_subset}.jsonl.gz"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
cuts_train = CutSet.from_file(
|
|
||||||
self.args.manifest_dir
|
|
||||||
/ f"cuts_{self.args.training_subset}.jsonl.gz"
|
|
||||||
)
|
|
||||||
return cuts_train
|
return cuts_train
|
||||||
|
|
||||||
@lru_cache()
|
@lru_cache()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user