mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
Merge 723320e0159d65856d3c86ae4f75b9d44034fc3d into abd9437e6d5419a497707748eb935e50976c3b7b
This commit is contained in:
commit
261463cf27
@ -1075,6 +1075,20 @@ def run(rank, world_size, args):
|
||||
)
|
||||
return False
|
||||
|
||||
# Zipformer has DownsampledZipformerEncoders with different downsampling factors
|
||||
# after encoder_embed that does T -> (T - 7) // 2
|
||||
ds = tuple(map(int, params.zipformer_downsampling_factors.split(",")))
|
||||
max_ds = max(ds)
|
||||
T = (c.num_frames - 7) // 2
|
||||
if T < max_ds:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. "
|
||||
f"Number of frames (before encoder_embed): {c.num_frames}. "
|
||||
f"Number of frames (after encoder_embed): {T}. "
|
||||
f"Max downsampling factor in Zipformer: {max_ds}. "
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
||||
|
@ -1100,6 +1100,20 @@ def run(rank, world_size, args):
|
||||
)
|
||||
return False
|
||||
|
||||
# Zipformer has DownsampledZipformerEncoders with different downsampling factors
|
||||
# after encoder_embed that does T -> (T - 7) // 2
|
||||
ds = tuple(map(int, params.zipformer_downsampling_factors.split(",")))
|
||||
max_ds = max(ds)
|
||||
T = (c.num_frames - 7) // 2
|
||||
if T < max_ds:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. "
|
||||
f"Number of frames (before encoder_embed): {c.num_frames}. "
|
||||
f"Number of frames (after encoder_embed): {T}. "
|
||||
f"Max downsampling factor in Zipformer: {max_ds}. "
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
||||
|
@ -1090,6 +1090,20 @@ def run(rank, world_size, args):
|
||||
)
|
||||
return False
|
||||
|
||||
# Zipformer has DownsampledZipformerEncoders with different downsampling factors
|
||||
# after encoder_embed that does T -> (T - 7) // 2
|
||||
ds = tuple(map(int, params.zipformer_downsampling_factors.split(",")))
|
||||
max_ds = max(ds)
|
||||
T = (c.num_frames - 7) // 2
|
||||
if T < max_ds:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. "
|
||||
f"Number of frames (before encoder_embed): {c.num_frames}. "
|
||||
f"Number of frames (after encoder_embed): {T}. "
|
||||
f"Max downsampling factor in Zipformer: {max_ds}. "
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
||||
|
@ -1093,6 +1093,20 @@ def run(rank, world_size, args):
|
||||
)
|
||||
return False
|
||||
|
||||
# Zipformer has DownsampledZipformerEncoders with different downsampling factors
|
||||
# after encoder_embed that does T -> (T - 7) // 2
|
||||
ds = tuple(map(int, params.zipformer_downsampling_factors.split(",")))
|
||||
max_ds = max(ds)
|
||||
T = (c.num_frames - 7) // 2
|
||||
if T < max_ds:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. "
|
||||
f"Number of frames (before encoder_embed): {c.num_frames}. "
|
||||
f"Number of frames (after encoder_embed): {T}. "
|
||||
f"Max downsampling factor in Zipformer: {max_ds}. "
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# train_cuts = train_cuts.filter(remove_short_and_long_utt)
|
||||
|
@ -1016,7 +1016,7 @@ def train_one_epoch(
|
||||
|
||||
|
||||
def filter_short_and_long_utterances(
|
||||
cuts: CutSet, sp: spm.SentencePieceProcessor
|
||||
cuts: CutSet, sp: spm.SentencePieceProcessor, zipformer_downsampling_factors: str
|
||||
) -> CutSet:
|
||||
def remove_short_and_long_utt(c: Cut):
|
||||
# Keep only utterances with duration between 1 second and 20 seconds
|
||||
@ -1053,6 +1053,20 @@ def filter_short_and_long_utterances(
|
||||
)
|
||||
return False
|
||||
|
||||
# Zipformer has DownsampledZipformerEncoders with different downsampling factors
|
||||
# after encoder_embed that does T -> (T - 7) // 2
|
||||
ds = tuple(map(int, zipformer_downsampling_factors.split(",")))
|
||||
max_ds = max(ds)
|
||||
T = (c.num_frames - 7) // 2
|
||||
if T < max_ds:
|
||||
logging.warning(
|
||||
f"Exclude cut with ID {c.id} from training. "
|
||||
f"Number of frames (before encoder_embed): {c.num_frames}. "
|
||||
f"Number of frames (after encoder_embed): {T}. "
|
||||
f"Max downsampling factor in Zipformer: {max_ds}. "
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
cuts = cuts.filter(remove_short_and_long_utt)
|
||||
@ -1167,7 +1181,9 @@ def run(rank, world_size, args):
|
||||
else:
|
||||
train_cuts = librispeech.train_clean_100_cuts()
|
||||
|
||||
train_cuts = filter_short_and_long_utterances(train_cuts, sp)
|
||||
train_cuts = filter_short_and_long_utterances(
|
||||
train_cuts, sp, params.zipformer_downsampling_factors
|
||||
)
|
||||
|
||||
gigaspeech = GigaSpeech(manifest_dir=args.manifest_dir)
|
||||
# XL 10k hours
|
||||
|
Loading…
x
Reference in New Issue
Block a user