mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
changes to train script - no need for limiting utterance length here
This commit is contained in:
parent
e1f140a50e
commit
fe9f975ec2
@ -1 +1 @@
|
|||||||
/root/icefall/egs/librispeech/ASR/local/validate_bpe_lexicon.py
|
/root/Github/reazon-icefall/egs/librispeech/ASR/local/validate_bpe_lexicon.py
|
@ -1185,15 +1185,12 @@ def run(rank, world_size, args):
|
|||||||
train_cuts = multi_dataset.train_cuts()
|
train_cuts = multi_dataset.train_cuts()
|
||||||
|
|
||||||
def remove_short_and_long_utt(c: Cut):
|
def remove_short_and_long_utt(c: Cut):
|
||||||
# Keep only utterances with duration between 1 second and 30 seconds
|
# Keep only utterances greater than 1 second
|
||||||
#
|
|
||||||
# Caution: There is a reason to select 30.0 here. Please see
|
|
||||||
# ../local/display_manifest_statistics.py
|
|
||||||
#
|
#
|
||||||
# You should use ../local/display_manifest_statistics.py to get
|
# You should use ../local/display_manifest_statistics.py to get
|
||||||
# an utterance duration distribution for your dataset to select
|
# an utterance duration distribution for your dataset to select
|
||||||
# the threshold
|
# the threshold as this is dependent on which datasets you choose
|
||||||
if c.duration < 1.0 or c.duration > 30.0:
|
if c.duration < 1.0:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
f"Exclude cut with ID {c.id} from training. Duration: {c.duration}"
|
||||||
)
|
)
|
||||||
@ -1239,14 +1236,10 @@ def run(rank, world_size, args):
|
|||||||
else:
|
else:
|
||||||
sampler_state_dict = None
|
sampler_state_dict = None
|
||||||
|
|
||||||
# train_dl = reazonspeech_corpus.train_dataloaders(
|
|
||||||
# train_cuts, sampler_state_dict=sampler_state_dict
|
|
||||||
# )
|
|
||||||
train_dl = multidataset_datamodule.train_dataloaders(
|
train_dl = multidataset_datamodule.train_dataloaders(
|
||||||
train_cuts, sampler_state_dict=sampler_state_dict
|
train_cuts, sampler_state_dict=sampler_state_dict
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
valid_cuts = multi_dataset.dev_cuts()
|
valid_cuts = multi_dataset.dev_cuts()
|
||||||
valid_dl = multidataset_datamodule.valid_dataloaders(valid_cuts)
|
valid_dl = multidataset_datamodule.valid_dataloaders(valid_cuts)
|
||||||
|
|
||||||
@ -1393,7 +1386,6 @@ def main():
|
|||||||
MultiDatasetAsrDataModule.add_arguments(parser)
|
MultiDatasetAsrDataModule.add_arguments(parser)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
args.exp_dir = Path(args.exp_dir)
|
args.exp_dir = Path(args.exp_dir)
|
||||||
print(args)
|
|
||||||
|
|
||||||
world_size = args.world_size
|
world_size = args.world_size
|
||||||
assert world_size >= 1
|
assert world_size >= 1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user