diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index cf2f05999..63adfa792 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -1020,7 +1020,7 @@ class Conv2dSubsampling(nn.Module):
             DoubleSwish(),
         )
         out_height = (((in_channels - 1) // 2 - 1) // 2)
-        self.out = nn.Linear(out_height * layer3_channels, out_channels)
+        self.out = ScaledLinear(out_height * layer3_channels, out_channels)
         # set learn_eps=False because out_norm is preceded by `out`, and `out`
         # itself has learned scale, so the extra degree of freedom is not
         # needed.