diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index cf2f05999..63adfa792 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -1020,7 +1020,7 @@ class Conv2dSubsampling(nn.Module): DoubleSwish(), ) out_height = (((in_channels - 1) // 2 - 1) // 2) - self.out = nn.Linear(out_height * layer3_channels, out_channels) + self.out = ScaledLinear(out_height * layer3_channels, out_channels) # set learn_eps=False because out_norm is preceded by `out`, and `out` # itself has learned scale, so the extra degree of freedom is not # needed.