diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index 90449617d..673470f59 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -812,7 +812,6 @@ class EntropyPenaltyFunction(torch.autograd.Function):
                 num_heads: int,
                 entropy_limit: float,
                 grad_scale: float) -> Tensor:
-        logging.info("Here3")
         ctx.save_for_backward(attn_weights)
         ctx.num_heads = num_heads
         ctx.entropy_limit = entropy_limit
@@ -826,7 +825,6 @@ class EntropyPenaltyFunction(torch.autograd.Function):
         num_heads = ctx.num_heads
         entropy_limit = ctx.entropy_limit
         grad_scale = ctx.grad_scale
-        logging.info("Here4")
         with torch.enable_grad():
             with torch.cuda.amp.autocast(enabled=False):
                 attn_weights_orig = attn_weights.to(torch.float32).detach()
@@ -909,9 +907,7 @@ class EntropyPenalty(nn.Module):
          you use the returned attention weights, or the graph will be freed
          and nothing will happen in backprop.
         """
-        logging.info("Here1")
         if not attn_weights.requires_grad or random.random() > self.prob:
-            logging.info("Here2")
             return attn_weights
         else:
             seq_len = attn_weights.shape[2]