From 7920fa7726f6faf4fb783460750fa2756f9f31b6 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 12 Dec 2022 23:29:42 +0800 Subject: [PATCH 1/4] Add out_balancer for attention_squeeze, similar to nonlin_attention_module. --- .../ASR/pruned_transducer_stateless7/zipformer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 65c2ffb0d..a122a112f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1403,6 +1403,13 @@ class AttentionSqueeze(nn.Module): prob=_aux_grad_prob_out(), bias=False, initial_scale=0.05) + self.out_balancer = ActivationBalancer( + channels, channel_dim=-1, + min_positive=0.4, max_positive=0.5, + min_abs=ScheduledFloat((0.0, 0.002), (8000.0, 0.02), (20000.0, 0.01)), + ) + + def forward(self, x: Tensor, attn_weights: Tensor): From 0522425ea8a412d7ef828256c1dfe97b03ca3fa1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 12 Dec 2022 23:30:12 +0800 Subject: [PATCH 2/4] Change min and max positive --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index a122a112f..958ddf2b0 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1405,7 +1405,7 @@ class AttentionSqueeze(nn.Module): self.out_balancer = ActivationBalancer( channels, channel_dim=-1, - min_positive=0.4, max_positive=0.5, + min_positive=0.3, max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.002), (8000.0, 0.02), (20000.0, 0.01)), ) @@ -1539,7 +1539,7 @@ class NonlinAttentionModule(nn.Module): self.balancer2 = ActivationBalancer( channels, channel_dim=-1, - min_positive=0.4, max_positive=0.5, + min_positive=0.3, max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.01), (20000.0, 0.005)), ) From b5e0676f14fdd1e7d9c930a0b605878c20eb3383 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 12 Dec 2022 23:31:22 +0800 Subject: [PATCH 3/4] Invoke the out_balancer of attention_squeeze --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 958ddf2b0..908280572 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1445,6 +1445,7 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len) x = x * scales x = self.activation(x) # Identity only. For diagnostics. x = self.out_proj(x) + x = self.out_balancer(x) return x From d2465492f9ee85fb34c101ecfda047d21456269e Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 12 Dec 2022 23:32:08 +0800 Subject: [PATCH 4/4] Bug fix --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 908280572..9f578f379 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1404,7 +1404,7 @@ class AttentionSqueeze(nn.Module): bias=False, initial_scale=0.05) self.out_balancer = ActivationBalancer( - channels, channel_dim=-1, + embed_dim, channel_dim=-1, min_positive=0.3, max_positive=0.7, min_abs=ScheduledFloat((0.0, 0.002), (8000.0, 0.02), (20000.0, 0.01)), )