Increase initial scale for conv and self_attn

2025-09-07 16:14:17 +00:00 · 2022-05-22 12:18:57 +08:00 · 2022-05-22 12:18:57 +08:00 · 9e206d53fc
commit 9e206d53fc
parent 56d9928934
1 changed files with 2 additions and 2 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py
@ -440,7 +440,7 @@ class RelPositionMultiheadAttention(nn.Module):

        self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
        self.out_proj = ScaledLinear(
-            embed_dim, embed_dim, bias=True, initial_scale=0.05
+            embed_dim, embed_dim, bias=True, initial_scale=0.2
        )

        # linear transformation for positional encoding.
@ -904,7 +904,7 @@ class ConvolutionModule(nn.Module):
            stride=1,
            padding=0,
            bias=bias,
-            initial_scale=0.05,
+            initial_scale=0.2,
        )

    def forward(self, x: Tensor) -> Tensor: