From ba348169bfecbc8231fe21429aae6fa66fb4c740 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 25 Nov 2022 12:39:16 +0800 Subject: [PATCH] Change for diagnostic purposes, sigmoid of NonlinAttention. --- egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 44e024bce..a973dd74b 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1431,6 +1431,7 @@ class NonlinAttentionModule(nn.Module): min_abs=0.2, max_abs=10.0, min_prob=0.05, ) + self.sigmoid = nn.Sigmoid() self.activation = Identity() # for diagnostics. self.out_proj = ScaledLinear(channels, channels, @@ -1471,7 +1472,7 @@ attn_weights: a Tensor of shape (num_heads, batch_size, seq_len, seq_len) v = self.whiten1(v) # GLU mechanism - x = s.sigmoid() * v + x = self.sigmoid(s) * v x = self.balancer(x) (seq_len, batch_size, embed_dim) = x.shape