Merge branch 'pradam_exp1n2' into pradam_exp1m7s2

This commit is contained in:
Daniel Povey 2022-08-24 04:14:25 +08:00
commit 80beb9c8d7

View File

@ -468,8 +468,7 @@ class RelPositionMultiheadAttention(nn.Module):
self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
self.in_balancer = ActivationBalancer(channel_dim=-1, max_abs=5.0)
self.proj_balancer = ActivationBalancer(channel_dim=-1, min_positive=0.0,
max_positive=1.0, max_abs=10.0)
self.proj_balancer = ActivationBalancer(channel_dim=-1, max_abs=10.0)
self.out_proj = ScaledLinear(
embed_dim, embed_dim, bias=True, initial_scale=0.5
)