From ff4028df8e7a13048ed72965fd023821b9a00497 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 7 Oct 2022 17:19:23 +0800 Subject: [PATCH] Revert initial_scale to previous values. --- .../ASR/pruned_transducer_stateless7/conformer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index c987f3000..1c084f92e 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -268,7 +268,7 @@ class ConformerEncoderLayer(nn.Module): DoubleSwish(), nn.Dropout(dropout), ScaledLinear(feedforward_dim, d_model, - initial_scale=0.01), + initial_scale=0.1), ) self.feed_forward_macaron = nn.Sequential( @@ -278,7 +278,7 @@ class ConformerEncoderLayer(nn.Module): DoubleSwish(), nn.Dropout(dropout), ScaledLinear(feedforward_dim, d_model, - initial_scale=0.01), + initial_scale=0.1), ) self.conv_module = ConvolutionModule(d_model, @@ -783,7 +783,7 @@ class RelPositionMultiheadAttention(nn.Module): channel_dim=-1, max_abs=10.0, min_positive=0.0, max_positive=1.0) self.out_proj = ScaledLinear( - embed_dim // 2, embed_dim, bias=True, initial_scale=0.05 + embed_dim // 2, embed_dim, bias=True, initial_scale=0.5 ) self.attn_scores_proj_in = nn.Parameter(torch.eye(num_heads)) @@ -1201,7 +1201,7 @@ class ConvolutionModule(nn.Module): stride=1, padding=0, bias=bias, - initial_scale=0.05, + initial_scale=0.5, ) def forward(self,