From 9f68b5717c84b91afae4d099b7aecf0930ee3d1a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 21 Oct 2022 12:13:23 +0800 Subject: [PATCH] Reduce the limit on attention weights from 50 to 25. --- egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index 706bc41e3..41726c9fb 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -1116,7 +1116,7 @@ class RelPositionMultiheadAttention(nn.Module): # this mechanism instead of, say, a limit on entropy, because once the entropy # gets very small gradients through the softmax can become very small, and # some mechanisms like that become ineffective. - attn_weights_limit = 50.0 + attn_weights_limit = 25.0 # caution: this penalty will be affected by grad-scaling in amp. # It's OK; this is just an emergency brake, and under normal # conditions it shouldn't be active