diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py index 87034b3e3..0090eb1e8 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py @@ -107,7 +107,7 @@ def add_model_arguments(parser: argparse.ArgumentParser): parser.add_argument( "--num-encoder-layers", type=str, - default="4,4,4,4,4,4", + default="2,4,4,4,4,4", help="Number of zipformer encoder layers per stack, comma separated.", ) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 197362c9d..7dfa31e3b 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1815,6 +1815,7 @@ class Conv2dSubsampling(nn.Module): ) self.convnext2 = nn.Sequential(ConvNeXt(layer3_channels), + ConvNeXt(layer3_channels), ConvNeXt(layer3_channels))