From 11a04c50ae15505c7c480963203531abe0c65e98 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Mon, 21 Mar 2022 21:29:24 +0800 Subject: [PATCH] Change 0.025,0.05 to 0.01 in initializations --- .../ASR/pruned_transducer_stateless2/conformer.py | 4 ++-- .../ASR/pruned_transducer_stateless2/scaling.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py index c6470b4a2..f778c9226 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py @@ -440,8 +440,8 @@ class RelPositionMultiheadAttention(nn.Module): return self.pos_bias_v * self.pos_bias_v_scale.exp() def _reset_parameters(self) -> None: - nn.init.normal_(self.pos_bias_u, std=0.05) - nn.init.normal_(self.pos_bias_v, std=0.05) + nn.init.normal_(self.pos_bias_u, std=0.01) + nn.init.normal_(self.pos_bias_v, std=0.01) def forward( self, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py index b358e5fa2..f2423492f 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py @@ -153,7 +153,7 @@ class ScaledLinear(nn.Linear): self._reset_parameters() # Overrides the reset_parameters in nn.Linear def _reset_parameters(self): - std = 0.025 + std = 0.01 a = (3 ** 0.5) * std nn.init.uniform_(self.weight, -a, a) if self.bias is not None: @@ -188,7 +188,7 @@ class ScaledConv1d(nn.Conv1d): self._reset_parameters() # Overrides the reset_parameters in base class def _reset_parameters(self): - std = 0.025 + std = 0.01 a = (3 ** 0.5) * std nn.init.uniform_(self.weight, -a, a) if self.bias is not None: @@ -229,7 +229,7 @@ class ScaledConv2d(nn.Conv2d): self._reset_parameters() # Overrides the reset_parameters in base class def _reset_parameters(self): - std = 0.025 + std = 0.01 a = (3 ** 0.5) * std nn.init.uniform_(self.weight, -a, a) if self.bias is not None: @@ -451,7 +451,7 @@ class ScaledEmbedding(nn.Module): def reset_parameters(self) -> None: - std = 0.025 + std = 0.01 nn.init.normal_(self.weight, std=std) nn.init.constant_(self.scale, torch.tensor(1.0/std).log())