From 22b4a417dd64b941496642adf272294bcf9a893c Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Wed, 4 Jan 2023 20:59:58 +0800 Subject: [PATCH] Implement extra_layerdrop --- .../ASR/pruned_transducer_stateless7/zipformer.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index f7c9c487b..25a8dcf3a 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -199,6 +199,15 @@ class Zipformer(EncoderInterface): attention_share_layers=attention_share_layers[i], ) + # modify the layerdrop schedule with an extra schedule that takes longer + # to warm up for the less-downsampled layers; this encourages the more + # heavily downsampled layers to learn something. + + extra_layerdrop = ScheduledFloat((0.0, 0.2), (20000.0 / downsampling_factor[i], 0.0)) + for layer in encoder.layers: + # we can add objects of type ScheduledFloat. + layer.layer_skip_rate = layer.layer_skip_rate + extra_layerdrop + if downsampling_factor[i] != 1: encoder = DownsampledZipformerEncoder( encoder, @@ -208,6 +217,9 @@ class Zipformer(EncoderInterface): dropout=dropout, ) encoders.append(encoder) + + + self.encoders = nn.ModuleList(encoders) # initializes self.skip_layers and self.skip_modules