diff --git a/egs/librispeech/ASR/conformer_ctc2/attention.py b/egs/librispeech/ASR/conformer_ctc2/attention.py
index 16a50cf90..0f4313c17 100644
--- a/egs/librispeech/ASR/conformer_ctc2/attention.py
+++ b/egs/librispeech/ASR/conformer_ctc2/attention.py
@@ -62,6 +62,7 @@ class MultiheadAttention(nn.Module):
         embed_dim,
         num_heads,
         dropout=0.0,
+        bias=True,
         add_bias_kv=False,
         add_zero_attn=False,
         kdim=None,
@@ -78,8 +79,6 @@ class MultiheadAttention(nn.Module):
         self._qkv_same_embed_dim = (
             self.kdim == embed_dim and self.vdim == embed_dim
         )
-        self.bias = True
-        bias = self.bias
 
         self.num_heads = num_heads
         self.dropout = dropout
diff --git a/egs/librispeech/ASR/conformer_ctc2/decode.py b/egs/librispeech/ASR/conformer_ctc2/decode.py
index 7c659a698..45fa97812 100755
--- a/egs/librispeech/ASR/conformer_ctc2/decode.py
+++ b/egs/librispeech/ASR/conformer_ctc2/decode.py
@@ -125,7 +125,7 @@ def get_parser():
     parser.add_argument(
         "--use-averaged-model",
         type=str2bool,
-        default=False,
+        default=True,
         help="Whether to load averaged model. Currently it only supports "
         "using --epoch. If True, it would decode with the averaged model "
         "over the epoch range from `epoch-avg` (excluded) to `epoch`."