Increase penalty cutoff in NonlinAttention to 40.

2025-12-11 06:55:27 +00:00 · 2023-05-29 23:02:59 +08:00 · 2023-05-29 23:02:59 +08:00 · d0309c3f3d
commit d0309c3f3d
parent 09294c0b51
1 changed files with 1 additions and 1 deletions
--- a/egs/libriheavy/LM/zipformer1/subformer.py
+++ b/egs/libriheavy/LM/zipformer1/subformer.py
@ -1758,7 +1758,7 @@ class NonlinAttention(nn.Module):
        # ensure the activations after multiplication don't get too large.
        self.hidden_penalty = AbsValuePenalizer(
-            limit=10.0, penalty=1.0e-04, prob=0.1)
+            limit=40.0, penalty=1.0e-04, prob=0.1)
        self.out_proj = ScaledLinear(hidden_channels, channels,
                                     bias=True,