diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
index b2de7232f..e1a91bae9 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py
@@ -1085,7 +1085,6 @@ class RelPositionMultiheadAttention(nn.Module):
 
         q = q.permute(1, 2, 0, 3)  # (batch, head, time1, head_dim)
         p = p.permute(1, 2, 0, 3)  # (batch, head, time1, head_dim // 2)
-        # compute attention score
         k = k.permute(1, 2, 3, 0)  # (batch, head, d_k, time2)
 
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
index e5d4f73ad..88b78e92c 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
@@ -121,7 +121,8 @@ def add_model_arguments(parser: argparse.ArgumentParser):
         "--attention-dims",
         type=str,
         default="192,192",
-        help="Attention dimension in the 2 blocks of conformer encoder layers, comma separated"
+        help="""Attention dimension in the 2 blocks of conformer encoder layers, comma separated;
+        not the same as embedding dimension."""
     )
 
     parser.add_argument(