diff --git a/egs/librispeech/ASR/conformer_ctc2/attention.py b/egs/librispeech/ASR/conformer_ctc2/attention.py index 16a50cf90..0f4313c17 100644 --- a/egs/librispeech/ASR/conformer_ctc2/attention.py +++ b/egs/librispeech/ASR/conformer_ctc2/attention.py @@ -62,6 +62,7 @@ class MultiheadAttention(nn.Module): embed_dim, num_heads, dropout=0.0, + bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, @@ -78,8 +79,6 @@ class MultiheadAttention(nn.Module): self._qkv_same_embed_dim = ( self.kdim == embed_dim and self.vdim == embed_dim ) - self.bias = True - bias = self.bias self.num_heads = num_heads self.dropout = dropout diff --git a/egs/librispeech/ASR/conformer_ctc2/decode.py b/egs/librispeech/ASR/conformer_ctc2/decode.py index 7c659a698..45fa97812 100755 --- a/egs/librispeech/ASR/conformer_ctc2/decode.py +++ b/egs/librispeech/ASR/conformer_ctc2/decode.py @@ -125,7 +125,7 @@ def get_parser(): parser.add_argument( "--use-averaged-model", type=str2bool, - default=False, + default=True, help="Whether to load averaged model. Currently it only supports " "using --epoch. If True, it would decode with the averaged model " "over the epoch range from `epoch-avg` (excluded) to `epoch`."