Increase penalty cutoff in NonlinAttention to 40.

This commit is contained in:
Daniel Povey 2023-05-29 23:02:59 +08:00
parent 09294c0b51
commit d0309c3f3d

View File

@ -1758,7 +1758,7 @@ class NonlinAttention(nn.Module):
# ensure the activations after multiplication don't get too large. # ensure the activations after multiplication don't get too large.
self.hidden_penalty = AbsValuePenalizer( self.hidden_penalty = AbsValuePenalizer(
limit=10.0, penalty=1.0e-04, prob=0.1) limit=40.0, penalty=1.0e-04, prob=0.1)
self.out_proj = ScaledLinear(hidden_channels, channels, self.out_proj = ScaledLinear(hidden_channels, channels,
bias=True, bias=True,