Minor fixes.

This commit is contained in:
Fangjun Kuang 2022-04-14 12:08:39 +08:00
parent 04d4423615
commit ec9bbf7352
3 changed files with 48 additions and 18 deletions

View File

@ -811,13 +811,23 @@ def run(rank, world_size, args):
train_cuts = train_cuts.filter(remove_short_and_long_utt) train_cuts = train_cuts.filter(remove_short_and_long_utt)
num_left = len(train_cuts) try:
num_removed = num_in_total - num_left num_left = len(train_cuts)
removed_percent = num_removed / num_in_total * 100 num_removed = num_in_total - num_left
removed_percent = num_removed / num_in_total * 100
logging.info(f"Before removing short and long utterances: {num_in_total}") logging.info(
logging.info(f"After removing short and long utterances: {num_left}") f"Before removing short and long utterances: {num_in_total}"
logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)") )
logging.info(f"After removing short and long utterances: {num_left}")
logging.info(
f"Removed {num_removed} utterances ({removed_percent:.5f}%)"
)
except TypeError as e:
# You can ignore this error as previous versions of Lhotse work fine
# for the above code. In recent versions of Lhotse, it uses
# lazy filter, producing cutsets that don't have the __len__ method
logging.info(str(e))
if params.start_batch > 0 and checkpoints and "sampler" in checkpoints: if params.start_batch > 0 and checkpoints and "sampler" in checkpoints:
# We only load the sampler's state dict when it loads a checkpoint # We only load the sampler's state dict when it loads a checkpoint

View File

@ -653,13 +653,23 @@ def run(rank, world_size, args):
train_cuts = train_cuts.filter(remove_short_and_long_utt) train_cuts = train_cuts.filter(remove_short_and_long_utt)
num_left = len(train_cuts) try:
num_removed = num_in_total - num_left num_left = len(train_cuts)
removed_percent = num_removed / num_in_total * 100 num_removed = num_in_total - num_left
removed_percent = num_removed / num_in_total * 100
logging.info(f"Before removing short and long utterances: {num_in_total}") logging.info(
logging.info(f"After removing short and long utterances: {num_left}") f"Before removing short and long utterances: {num_in_total}"
logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)") )
logging.info(f"After removing short and long utterances: {num_left}")
logging.info(
f"Removed {num_removed} utterances ({removed_percent:.5f}%)"
)
except TypeError as e:
# You can ignore this error as previous versions of Lhotse work fine
# for the above code. In recent versions of Lhotse, it uses
# lazy filter, producing cutsets that don't have the __len__ method
logging.info(str(e))
train_dl = librispeech.train_dataloaders(train_cuts) train_dl = librispeech.train_dataloaders(train_cuts)

View File

@ -641,13 +641,23 @@ def run(rank, world_size, args):
train_cuts = train_cuts.filter(remove_short_and_long_utt) train_cuts = train_cuts.filter(remove_short_and_long_utt)
num_left = len(train_cuts) try:
num_removed = num_in_total - num_left num_left = len(train_cuts)
removed_percent = num_removed / num_in_total * 100 num_removed = num_in_total - num_left
removed_percent = num_removed / num_in_total * 100
logging.info(f"Before removing short and long utterances: {num_in_total}") logging.info(
logging.info(f"After removing short and long utterances: {num_left}") f"Before removing short and long utterances: {num_in_total}"
logging.info(f"Removed {num_removed} utterances ({removed_percent:.5f}%)") )
logging.info(f"After removing short and long utterances: {num_left}")
logging.info(
f"Removed {num_removed} utterances ({removed_percent:.5f}%)"
)
except TypeError as e:
# You can ignore this error as previous versions of Lhotse work fine
# for the above code. In recent versions of Lhotse, it uses
# lazy filter, producing cutsets that don't have the __len__ method
logging.info(str(e))
train_dl = librispeech.train_dataloaders(train_cuts) train_dl = librispeech.train_dataloaders(train_cuts)