Use more attention heads in slowest layer.

This commit is contained in:
Daniel Povey 2022-11-11 22:56:14 +08:00
parent f7aff4f507
commit 4988c815c9

View File

@ -127,7 +127,7 @@ def add_model_arguments(parser: argparse.ArgumentParser):
parser.add_argument( parser.add_argument(
"--num-heads", "--num-heads",
type=str, type=str,
default="8", default="8,8,8,16,8,8",
help="Number of attention heads in the zipformer encoder layers: a single int or comma-separated list.", help="Number of attention heads in the zipformer encoder layers: a single int or comma-separated list.",
) )