Increase initial scale for conv and self_attn

This commit is contained in:
Daniel Povey 2022-05-22 12:18:57 +08:00
parent 56d9928934
commit 9e206d53fc

View File

@ -440,7 +440,7 @@ class RelPositionMultiheadAttention(nn.Module):
self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True) self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
self.out_proj = ScaledLinear( self.out_proj = ScaledLinear(
embed_dim, embed_dim, bias=True, initial_scale=0.05 embed_dim, embed_dim, bias=True, initial_scale=0.2
) )
# linear transformation for positional encoding. # linear transformation for positional encoding.
@ -904,7 +904,7 @@ class ConvolutionModule(nn.Module):
stride=1, stride=1,
padding=0, padding=0,
bias=bias, bias=bias,
initial_scale=0.05, initial_scale=0.2,
) )
def forward(self, x: Tensor) -> Tensor: def forward(self, x: Tensor) -> Tensor: