From 9f68b5717c84b91afae4d099b7aecf0930ee3d1a Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Fri, 21 Oct 2022 12:13:23 +0800
Subject: [PATCH] Reduce the limit on attention weights from 50 to 25.

---
 egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index 706bc41e3..41726c9fb 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -1116,7 +1116,7 @@ class RelPositionMultiheadAttention(nn.Module):
             # this mechanism instead of, say, a limit on entropy, because once the entropy
             # gets very small gradients through the softmax can become very small, and
             # some mechanisms like that become ineffective.
-            attn_weights_limit = 50.0
+            attn_weights_limit = 25.0
             # caution: this penalty will be affected by grad-scaling in amp.
             # It's OK; this is just an emergency brake, and under normal
             # conditions it shouldn't be active