mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-25 17:56:13 +00:00
implement musan and enable by default
This commit is contained in:
parent
de7406f1b9
commit
c5c35859ec
@ -174,13 +174,16 @@ class ReazonSpeechAsrDataModule:
|
|||||||
group.add_argument(
|
group.add_argument(
|
||||||
"--enable-musan",
|
"--enable-musan",
|
||||||
type=str2bool,
|
type=str2bool,
|
||||||
default=False,
|
default=True,
|
||||||
help="When enabled, select noise from MUSAN and mix it"
|
help="When enabled, select noise from MUSAN and mix it"
|
||||||
"with training dataset. ",
|
"with training dataset. ",
|
||||||
)
|
)
|
||||||
|
|
||||||
def train_dataloaders(
|
def train_dataloaders(
|
||||||
self, cuts_train: CutSet, sampler_state_dict: Optional[Dict[str, Any]] = None
|
self,
|
||||||
|
cuts_train: CutSet,
|
||||||
|
sampler_state_dict: Optional[Dict[str, Any]] = None,
|
||||||
|
cuts_musan: Optional[CutSet] = None,
|
||||||
) -> DataLoader:
|
) -> DataLoader:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@ -191,6 +194,14 @@ class ReazonSpeechAsrDataModule:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
transforms = []
|
transforms = []
|
||||||
|
if cuts_musan is not None:
|
||||||
|
logging.info("Enable MUSAN")
|
||||||
|
transforms.append(
|
||||||
|
CutMix(cuts=cuts_musan, p=0.5, snr=(10, 20), preserve_id=True)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.info("Disable MUSAN")
|
||||||
|
|
||||||
input_transforms = []
|
input_transforms = []
|
||||||
|
|
||||||
if self.args.enable_spec_aug:
|
if self.args.enable_spec_aug:
|
||||||
|
@ -1219,8 +1219,15 @@ def run(rank, world_size, args):
|
|||||||
else:
|
else:
|
||||||
sampler_state_dict = None
|
sampler_state_dict = None
|
||||||
|
|
||||||
|
if args.enable_musan:
|
||||||
|
cuts_musan = load_manifest(Path(args.manifest_dir) / "musan_cuts.jsonl.gz")
|
||||||
|
else:
|
||||||
|
cuts_musan = None
|
||||||
|
|
||||||
train_dl = reazonspeech_corpus.train_dataloaders(
|
train_dl = reazonspeech_corpus.train_dataloaders(
|
||||||
train_cuts, sampler_state_dict=sampler_state_dict
|
train_cuts,
|
||||||
|
sampler_state_dict=sampler_state_dict,
|
||||||
|
cuts_musan=cuts_musan,
|
||||||
)
|
)
|
||||||
|
|
||||||
valid_cuts = reazonspeech_corpus.valid_cuts()
|
valid_cuts = reazonspeech_corpus.valid_cuts()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user