From 56efdcda49000b9dd32f63a739744fb26ee0b648 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Mon, 21 Nov 2022 21:07:32 +0800
Subject: [PATCH] Reduce whitening limit to 10 and move it to the beginning.

---
 .../ASR/pruned_transducer_stateless7/zipformer.py          | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
index beac0bb40..896d84032 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@@ -1423,7 +1423,7 @@ class NonlinAttentionModule(nn.Module):
             min_prob=0.1,
         )
         self.whiten = Whiten(num_groups=1,
-                             whitening_limit=20.0,
+                             whitening_limit=10.0,
                              prob=(0.025, 0.25),
                              grad_scale=0.01)
 
@@ -1444,7 +1444,9 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len)
         Returns:
            a Tensor with the same shape as x
         """
-        v, s = self.in_proj(x).chunk(2, dim=-1)
+        x = self.in_proj(x)
+        x = self.whiten(x)
+        v, s = x.chunk(2, dim=-1)
 
         if self.training and random.random() < 0.02:
             # prevent the inputs to the sigmoid from getting very large (this is
@@ -1455,7 +1457,6 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len)
         # GLU mechanism
         x = s.sigmoid() * v
         x = self.balancer(x)
-        x = self.whiten(x)
 
         (seq_len, batch_size, embed_dim) = x.shape
         num_heads = attn_weights.shape[0]