diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
index 64030ef90..fae91aa71 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
@@ -231,7 +231,8 @@ class ConformerEncoderLayer(nn.Module):
         # period we sometimes use scale 1.0; this ensures that the modules do not
         # compensate for the small scale by just producing larger output.
         warmup = max(warmup, 0.1)
-        warmup = min(warmup, 0.95)  # effectively, layer-drop.
+        if self.training:
+            warmup = min(warmup, 0.95)  # effectively, layer-drop.
         alpha = 1.0 if torch.rand(()).item() <= warmup else 0.1
 
         # macaron style feed forward module