diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
index 6ca21789c..305924e58 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
@@ -123,7 +123,7 @@ def add_model_arguments(parser: argparse.ArgumentParser):
     parser.add_argument(
         "--num-encoder-layers",
         type=str,
-        default="2,4,4,4,4,4",
+        default="2,4,4,6,4,4",
         help="Number of zipformer encoder layers per stack, comma separated.",
     )
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
index 7fec74a76..fc70150c9 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@@ -1946,7 +1946,7 @@ class Conv2dSubsampling(nn.Module):
         assert in_channels >= 7
         super().__init__()
 
-        # The ScalarMultiply modules are there to prevent the gradients
+        # The ScaleGrad module is there to prevent the gradients
         # w.r.t. the weight or bias of the first Conv2d module in self.conv from
         # exceeding the range of fp16 when using automatic mixed precision (amp)
         # training.  (The second one is necessary to stop its bias from getting