Reduce the limit on attention weights from 50 to 25.

2025-12-11 06:55:27 +00:00 · 2022-10-21 12:13:23 +08:00 · 2022-10-21 12:13:23 +08:00 · 9f68b5717c
commit 9f68b5717c
parent c5cb52fed1
1 changed files with 1 additions and 1 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@ -1116,7 +1116,7 @@ class RelPositionMultiheadAttention(nn.Module):
            # this mechanism instead of, say, a limit on entropy, because once the entropy
            # gets very small gradients through the softmax can become very small, and
            # some mechanisms like that become ineffective.
-            attn_weights_limit = 50.0
+            attn_weights_limit = 25.0
            # caution: this penalty will be affected by grad-scaling in amp.
            # It's OK; this is just an emergency brake, and under normal
            # conditions it shouldn't be active