diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py index 6ca21789c..305924e58 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py @@ -123,7 +123,7 @@ def add_model_arguments(parser: argparse.ArgumentParser): parser.add_argument( "--num-encoder-layers", type=str, - default="2,4,4,4,4,4", + default="2,4,4,6,4,4", help="Number of zipformer encoder layers per stack, comma separated.", ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 7fec74a76..fc70150c9 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1946,7 +1946,7 @@ class Conv2dSubsampling(nn.Module): assert in_channels >= 7 super().__init__() - # The ScalarMultiply modules are there to prevent the gradients + # The ScaleGrad module is there to prevent the gradients # w.r.t. the weight or bias of the first Conv2d module in self.conv from # exceeding the range of fp16 when using automatic mixed precision (amp) # training. (The second one is necessary to stop its bias from getting