From 117d418e27f53efb6ac4cb06dd49e9c930b2eae1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 13 Dec 2022 19:17:38 +0800 Subject: [PATCH 1/2] Make nonlin_skip_rate nonzero and end after 20k iters; remove peak at 8k iteras of NonlinAttentionModule balancer2 min_abs. --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 360993621..7ebb35b73 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -399,7 +399,7 @@ class ZipformerEncoderLayer(nn.Module): # to work correctly. layer_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.05), default=0), dynamic_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.0), default=0), - nonlin_skip_rate: FloatLike = 0.0, + nonlin_skip_rate: FloatLike = ScheduledFloat((0.0, 0.1), (20000, 0.0), default=0), const_attention_rate: FloatLike = ScheduledFloat((0.0, 0.25), (4000.0, 0.025), default=0), bypass_min: FloatLike = ScheduledFloat((0.0, 0.75), (20000.0, 0.2), default=0), bypass_max: FloatLike = 1.0, From 52d18e405e4b0faf61efae2af611aa997afc872f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 13 Dec 2022 19:22:43 +0800 Subject: [PATCH 2/2] Change to balancer2 schedule of NonlinAttentionModule, remove peak at 8k. --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 7ebb35b73..99433796f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1541,7 +1541,7 @@ class NonlinAttentionModule(nn.Module): self.balancer2 = ActivationBalancer( channels, channel_dim=-1, min_positive=0.3, max_positive=0.7, - min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.01), (20000.0, 0.005)), + min_abs=ScheduledFloat((0.0, 0.001), (20000.0, 0.005)), )