From d0309c3f3d3a16909a166691dc34f0c723670764 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 29 May 2023 23:02:59 +0800 Subject: [PATCH] Increase penalty cutoff in NonlinAttention to 40. --- egs/libriheavy/LM/zipformer1/subformer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/libriheavy/LM/zipformer1/subformer.py b/egs/libriheavy/LM/zipformer1/subformer.py index a6e0e2575..41f3e5234 100644 --- a/egs/libriheavy/LM/zipformer1/subformer.py +++ b/egs/libriheavy/LM/zipformer1/subformer.py @@ -1758,7 +1758,7 @@ class NonlinAttention(nn.Module): # ensure the activations after multiplication don't get too large. self.hidden_penalty = AbsValuePenalizer( - limit=10.0, penalty=1.0e-04, prob=0.1) + limit=40.0, penalty=1.0e-04, prob=0.1) self.out_proj = ScaledLinear(hidden_channels, channels, bias=True,