mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-24 01:06:13 +00:00
Support training for aec-iva and gss data
This commit is contained in:
parent
77d8a15288
commit
7310489dc9
@ -101,6 +101,12 @@ class ICMCAsrDataModule:
|
|||||||
default=Path("data/fbank"),
|
default=Path("data/fbank"),
|
||||||
help="Path to directory with train/valid/test cuts.",
|
help="Path to directory with train/valid/test cuts.",
|
||||||
)
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--manifest-aec-iva-dir",
|
||||||
|
type=Path,
|
||||||
|
default=Path("data/fbank"),
|
||||||
|
help="Path to directory with aec iva data train/valid/test cuts.",
|
||||||
|
)
|
||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--max-duration",
|
"--max-duration",
|
||||||
type=int,
|
type=int,
|
||||||
@ -400,51 +406,67 @@ class ICMCAsrDataModule:
|
|||||||
)
|
)
|
||||||
return test_dl
|
return test_dl
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def train_ihm_cuts(self) -> CutSet:
|
def train_ihm_cuts(self) -> CutSet:
|
||||||
logging.info("About to get train-ihm cuts")
|
logging.info("About to get train-ihm cuts")
|
||||||
return load_manifest_lazy(
|
return load_manifest_lazy(
|
||||||
self.args.manifest_dir / "cuts_train_ihm.jsonl.gz"
|
self.args.manifest_dir / "cuts_train_ihm.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def train_ihm_rvb_cuts(self) -> CutSet:
|
def train_ihm_rvb_cuts(self) -> CutSet:
|
||||||
logging.info("About to get train-ihm-rvb cuts")
|
logging.info("About to get train-ihm-rvb cuts")
|
||||||
return load_manifest_lazy(
|
return load_manifest_lazy(
|
||||||
self.args.manifest_dir / "cuts_train_ihm_rvb.jsonl.gz"
|
self.args.manifest_dir / "cuts_train_ihm_rvb.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def train_shm_cuts(self) -> CutSet:
|
def train_shm_cuts(self) -> CutSet:
|
||||||
logging.info("About to get train-shm cuts")
|
logging.info("About to get train-shm cuts")
|
||||||
return load_manifest_lazy(
|
return load_manifest_lazy(
|
||||||
self.args.manifest_dir / "cuts_train_sdm.jsonl.gz"
|
self.args.manifest_dir / "cuts_train_sdm.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
def train_gss_cuts(self) -> CutSet:
|
||||||
|
logging.info("About to get train-gss cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_dir / "cuts_train_gss.jsonl.gz"
|
||||||
|
)
|
||||||
|
|
||||||
|
def train_aec_iva_cuts(self) -> CutSet:
|
||||||
|
logging.info("About to get train-aec_iva cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_aec_iva_dir / "icmcasr-aec-iva_cuts_train_aec_iva.jsonl.gz"
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def dev_ihm_cuts(self) -> CutSet:
|
def dev_ihm_cuts(self) -> CutSet:
|
||||||
logging.info("About to get dev-ihm cuts")
|
logging.info("About to get dev-ihm cuts")
|
||||||
return load_manifest_lazy(
|
return load_manifest_lazy(
|
||||||
self.args.manifest_dir / "cuts_dev_ihm.jsonl.gz"
|
self.args.manifest_dir / "cuts_dev_ihm.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
|
||||||
@lru_cache()
|
|
||||||
def dev_shm_cuts(self) -> CutSet:
|
def dev_shm_cuts(self) -> CutSet:
|
||||||
logging.info("About to get dev-other cuts")
|
logging.info("About to get dev-other cuts")
|
||||||
return load_manifest_lazy(
|
return load_manifest_lazy(
|
||||||
self.args.manifest_dir / "cuts_dev_sdm.jsonl.gz"
|
self.args.manifest_dir / "cuts_dev_sdm.jsonl.gz"
|
||||||
)
|
)
|
||||||
|
def dev_aec_iva_cuts(self) -> CutSet:
|
||||||
|
logging.info("About to get aec iva dev cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_aec_iva_dir / "icmcasr-aec-iva_cuts_train_aec_iva.jsonl.gz"
|
||||||
|
)
|
||||||
|
def dev_gss_cuts(self) -> CutSet:
|
||||||
|
logging.info("About to get dev-gss cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_dir / "cuts_dev_gss.jsonl.gz"
|
||||||
|
)
|
||||||
|
def test_eval_track1_gss(self) -> CutSet:
|
||||||
|
logging.info("About to get eval1-gss cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_dir / "cuts_eval_track1_gss.jsonl.gz"
|
||||||
|
)
|
||||||
|
def test_eval_track1_sdm(self) -> CutSet:
|
||||||
|
logging.info("About to get eval1-sdm cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_dir / "cuts_eval_track1_sdm.jsonl.gz"
|
||||||
|
)
|
||||||
|
def test_eval_track1_aec_iva(self) -> CutSet:
|
||||||
|
logging.info("About to get aec iva dev cuts")
|
||||||
|
return load_manifest_lazy(
|
||||||
|
self.args.manifest_aec_iva_dir / "cuts.jsonl.gz"
|
||||||
|
|
||||||
# @lru_cache()
|
|
||||||
# def test_clean_cuts(self) -> CutSet:
|
|
||||||
# logging.info("About to get test-clean cuts")
|
|
||||||
# return load_manifest_lazy(
|
|
||||||
# self.args.manifest_dir / "librispeech_cuts_test-clean.jsonl.gz"
|
|
||||||
# )
|
|
||||||
|
|
||||||
# @lru_cache()
|
|
||||||
# def test_other_cuts(self) -> CutSet:
|
|
||||||
# logging.info("About to get test-other cuts")
|
|
||||||
# return load_manifest_lazy(
|
|
||||||
# self.args.manifest_dir / "librispeech_cuts_test-other.jsonl.gz"
|
|
||||||
# )
|
|
||||||
|
@ -1179,6 +1179,8 @@ def run(rank, world_size, args):
|
|||||||
if params.full_data:
|
if params.full_data:
|
||||||
train_cuts += icmc.train_ihm_rvb_cuts()
|
train_cuts += icmc.train_ihm_rvb_cuts()
|
||||||
train_cuts += icmc.train_shm_cuts()
|
train_cuts += icmc.train_shm_cuts()
|
||||||
|
train_cuts += icmc.train_aec_iva_cuts()
|
||||||
|
train_cuts += icmc.train_gss_cuts()
|
||||||
|
|
||||||
def remove_short_and_long_utt(c: Cut):
|
def remove_short_and_long_utt(c: Cut):
|
||||||
# Keep only utterances with duration between 1 second and 20 seconds
|
# Keep only utterances with duration between 1 second and 20 seconds
|
||||||
@ -1231,7 +1233,8 @@ def run(rank, world_size, args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
valid_cuts = icmc.dev_ihm_cuts()
|
valid_cuts = icmc.dev_ihm_cuts()
|
||||||
# valid_cuts += librispeech.dev_other_cuts()
|
valid_cuts += icmc.dev_gss_cuts()
|
||||||
|
valid_cuts += icmc.dev_aec_iva_cuts()
|
||||||
valid_dl = icmc.valid_dataloaders(valid_cuts)
|
valid_dl = icmc.valid_dataloaders(valid_cuts)
|
||||||
|
|
||||||
if not params.print_diagnostics:
|
if not params.print_diagnostics:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user