diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index ba55ff6ea..706bc41e3 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -940,7 +940,6 @@ class RelPositionMultiheadAttention(nn.Module):
             training=self.training,
             key_padding_mask=key_padding_mask,
             attn_mask=attn_mask,
-            attn_weights_max=5.0 if self.training else None,
         )
         return x, weights
 
@@ -959,7 +958,6 @@ class RelPositionMultiheadAttention(nn.Module):
         training: bool = True,
         key_padding_mask: Optional[Tensor] = None,
         attn_mask: Optional[Tensor] = None,
-        attn_weights_max: Optional[float] = None,
     ) -> Tuple[Tensor, Optional[Tensor]]:
         r"""
         Args:
@@ -1111,16 +1109,6 @@ class RelPositionMultiheadAttention(nn.Module):
         # caution: they are really scores at this point.
         attn_output_weights = torch.matmul(q, k) + pos_weights
 
-        # The following is a soft way of encouraging the attention scores to not be too large;
-        # in training time, once they get outside a certain range, -5.0..5.0 currently, we
-        # randomly either leave them as-is or truncate them to that range.
-        if attn_weights_max is not None:
-            attn_output_weights = random_clamp(attn_output_weights,
-                                               min=-attn_weights_max,
-                                               max=attn_weights_max,
-                                               prob=0.5,
-                                               reflect=0.1)
-
         if training and random.random() < 0.1:
             # This is a harder way of limiting the attention scores to not be too large.
             # It incurs a penalty if any of them has an absolute value greater than 50.0.