Further increase scales

This commit is contained in:
Daniel Povey 2022-05-22 12:25:26 +08:00
parent 9e206d53fc
commit b916789ca3

View File

@ -175,7 +175,7 @@ class ConformerEncoderLayer(nn.Module):
DoubleSwish(), DoubleSwish(),
nn.Dropout(dropout), nn.Dropout(dropout),
ScaledLinear(dim_feedforward, d_model, ScaledLinear(dim_feedforward, d_model,
initial_scale=0.05), initial_scale=0.1),
) )
self.feed_forward_macaron = nn.Sequential( self.feed_forward_macaron = nn.Sequential(
@ -184,7 +184,7 @@ class ConformerEncoderLayer(nn.Module):
DoubleSwish(), DoubleSwish(),
nn.Dropout(dropout), nn.Dropout(dropout),
ScaledLinear(dim_feedforward, d_model, ScaledLinear(dim_feedforward, d_model,
initial_scale=0.05), initial_scale=0.1),
) )
self.conv_module = ConvolutionModule(d_model, self.conv_module = ConvolutionModule(d_model,
@ -440,7 +440,7 @@ class RelPositionMultiheadAttention(nn.Module):
self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True) self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
self.out_proj = ScaledLinear( self.out_proj = ScaledLinear(
embed_dim, embed_dim, bias=True, initial_scale=0.2 embed_dim, embed_dim, bias=True, initial_scale=0.5
) )
# linear transformation for positional encoding. # linear transformation for positional encoding.
@ -904,7 +904,7 @@ class ConvolutionModule(nn.Module):
stride=1, stride=1,
padding=0, padding=0,
bias=bias, bias=bias,
initial_scale=0.2, initial_scale=0.5,
) )
def forward(self, x: Tensor) -> Tensor: def forward(self, x: Tensor) -> Tensor: