Move whitening of NonlinAttentionModule from the output to the interior just apply to the value.

2022-11-24 13:27:32 +08:00 · 2022-11-24 13:27:32 +08:00 · de73e2e424
commit de73e2e424
parent ee61ec63b3
1 changed files with 1 additions and 1 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@ -1464,6 +1464,7 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len)
            # very small probability to save time).
            s = penalize_abs_values_gt(s, limit=20.0, penalty=1.0e-04)

+        v = self.whiten(v)
        # GLU mechanism
        x = s.sigmoid() * v
        x = self.balancer(x)
@ -1480,7 +1481,6 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len)

        x = self.activation(x)  # diagnostics only, it's the identity.
        x = self.out_proj(x)
-        x = self.whiten(x)
        return x