From 9e206d53fcb56a301fe30c472029aedc32c4d56f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 22 May 2022 12:18:57 +0800 Subject: [PATCH] Increase initial scale for conv and self_attn --- egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py index 8fd72cd33..4d6084d92 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/conformer.py @@ -440,7 +440,7 @@ class RelPositionMultiheadAttention(nn.Module): self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True) self.out_proj = ScaledLinear( - embed_dim, embed_dim, bias=True, initial_scale=0.05 + embed_dim, embed_dim, bias=True, initial_scale=0.2 ) # linear transformation for positional encoding. @@ -904,7 +904,7 @@ class ConvolutionModule(nn.Module): stride=1, padding=0, bias=bias, - initial_scale=0.05, + initial_scale=0.2, ) def forward(self, x: Tensor) -> Tensor: