diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index b2de7232f..e1a91bae9 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -1085,7 +1085,6 @@ class RelPositionMultiheadAttention(nn.Module): q = q.permute(1, 2, 0, 3) # (batch, head, time1, head_dim) p = p.permute(1, 2, 0, 3) # (batch, head, time1, head_dim // 2) - # compute attention score k = k.permute(1, 2, 3, 0) # (batch, head, d_k, time2) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py index e5d4f73ad..88b78e92c 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py @@ -121,7 +121,8 @@ def add_model_arguments(parser: argparse.ArgumentParser): "--attention-dims", type=str, default="192,192", - help="Attention dimension in the 2 blocks of conformer encoder layers, comma separated" + help="""Attention dimension in the 2 blocks of conformer encoder layers, comma separated; + not the same as embedding dimension.""" ) parser.add_argument(