Have 6 different encoder stacks, U-shaped network.

2025-12-11 06:55:27 +00:00 · 2022-10-28 20:36:45 +08:00 · 2022-10-28 20:36:45 +08:00 · 96ea4cf1be
commit 96ea4cf1be
parent 7b57a34227
1 changed files with 8 additions and 8 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py
@ -93,35 +93,35 @@ def add_model_arguments(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--num-encoder-layers",
        type=str,
-        default="2,4,4,4",
+        default="2,3,3,3,3,3",
        help="Number of zipformer encoder layers, comma separated.",
    )

    parser.add_argument(
        "--feedforward-dims",
        type=str,
-        default="1024,1536,1536,1536",
+        default="1024,1024,1536,1536,1536,1024",
        help="Feedforward dimension of the zipformer encoder layers, comma separated.",
    )

    parser.add_argument(
        "--nhead",
        type=str,
-        default="8,8,8,8",
+        default="8,8,8,8,8,8",
        help="Number of attention heads in the zipformer encoder layers.",
    )

    parser.add_argument(
        "--encoder-dims",
        type=str,
-        default="384,384,384,512",
+        default="384,384,384,384,384,384",
        help="Embedding dimension in the 2 blocks of zipformer encoder layers, comma separated"
    )

    parser.add_argument(
        "--attention-dims",
        type=str,
-        default="192,192,192,256",
+        default="192,192,192,192,192,192",
        help="""Attention dimension in the 2 blocks of zipformer encoder layers, comma separated;
        not the same as embedding dimension."""
    )
@ -129,7 +129,7 @@ def add_model_arguments(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--encoder-unmasked-dims",
        type=str,
-        default="256,256,256,256",
+        default="256,256,256,256,256,256",
        help="Unmasked dimensions in the encoders, relates to augmentation during training.  "
        "Must be <= each of encoder_dims.  Empirically, less than 256 seems to make performance "
        " worse."
@ -138,14 +138,14 @@ def add_model_arguments(parser: argparse.ArgumentParser):
    parser.add_argument(
        "--zipformer-downsampling-factors",
        type=str,
-        default="1,2,4,8",
+        default="1,2,4,8,4,2",
        help="Downsampling factor for each stack of encoder layers.",
    )

    parser.add_argument(
        "--cnn-module-kernels",
        type=str,
-        default="31,31,31,31",
+        default="31,31,31,31,31,31",
        help="Sizes of kernels in convolution modules",
    )