From 5f790c41f75e0f2bae7e24906851c1d52c6ee172 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 6 Apr 2023 23:55:25 +0800 Subject: [PATCH] Changes to bypass layer-skip-rate configuration. --- .../ASR/pruned_transducer_stateless7/zipformer.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 0328b20ed..33faeb4a3 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -207,7 +207,7 @@ class Zipformer2(EncoderInterface): dropout=dropout, warmup_begin=warmup_batches * (i + 1) / (num_encoders + 1), warmup_end=warmup_batches * (i + 2) / (num_encoders + 1), - final_layerdrop_rate=0.02 * (downsampling_factor[i] ** 0.5), + final_layerdrop_rate=0.035 * (downsampling_factor[i] ** 0.5), ) if downsampling_factor[i] != 1: @@ -531,7 +531,7 @@ class Zipformer2EncoderLayer(nn.Module): # self.bypass implements layer skipping as well as bypass; see its default values. self.bypass = BypassModule(embed_dim) # bypass_mid is bypass used in the middle of the layer. - self.bypass_mid = BypassModule(embed_dim) + self.bypass_mid = BypassModule(embed_dim, skip_rate=0.0) # skip probability for dynamic modules (meaning: anything but feedforward). @@ -816,9 +816,6 @@ class Zipformer2Encoder(nn.Module): self.layers[i].bypass.skip_rate = ScheduledFloat((cur_begin, initial_layerdrop_rate), (cur_end, final_layerdrop_rate), default=0.0) - self.layers[i].bypass_mid.skip_rate = ScheduledFloat((cur_begin, initial_layerdrop_rate), - (cur_end, final_layerdrop_rate), - default=0.0) cur_begin = cur_end def forward(