Modified train.py of tedlium3 models (#597)

This commit is contained in:
shcxlee 2022-10-02 00:01:15 -05:00 committed by GitHub
parent f3ad32777a
commit bf2c4a488e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 0 additions and 20 deletions

View File

@ -658,18 +658,8 @@ def run(rank, world_size, args):
# Keep only utterances with duration between 1 second and 17 seconds
return 1.0 <= c.duration <= 17.0
num_in_total = len(train_cuts)
train_cuts = train_cuts.filter(remove_short_and_long_utt)
num_left = len(train_cuts)
num_removed = num_in_total - num_left
removed_percent = num_removed / num_in_total * 100
logging.info(f"Before removing short and long utterances: {num_in_total}")
logging.info(f"After removing short and long utterances: {num_left}")
logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)")
train_dl = tedlium.train_dataloaders(train_cuts)
valid_cuts = tedlium.dev_cuts()
valid_dl = tedlium.valid_dataloaders(valid_cuts)

View File

@ -627,18 +627,8 @@ def run(rank, world_size, args):
# Keep only utterances with duration between 1 second and 17 seconds
return 1.0 <= c.duration <= 17.0
num_in_total = len(train_cuts)
train_cuts = train_cuts.filter(remove_short_and_long_utt)
num_left = len(train_cuts)
num_removed = num_in_total - num_left
removed_percent = num_removed / num_in_total * 100
logging.info(f"Before removing short and long utterances: {num_in_total}")
logging.info(f"After removing short and long utterances: {num_left}")
logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)")
train_dl = tedlium.train_dataloaders(train_cuts)
valid_cuts = tedlium.dev_cuts()
valid_dl = tedlium.valid_dataloaders(valid_cuts)