diff --git a/egs/librispeech/ASR/zipformer/attention_decoder.py b/egs/librispeech/ASR/zipformer/attention_decoder.py
index 1400b9161..71be2d1eb 100644
--- a/egs/librispeech/ASR/zipformer/attention_decoder.py
+++ b/egs/librispeech/ASR/zipformer/attention_decoder.py
@@ -15,11 +15,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# The model structure is modified from Daniel Povey's Zipformer
-# https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
 
 import math
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 import k2
 import torch
diff --git a/egs/librispeech/ASR/zipformer/train.py b/egs/librispeech/ASR/zipformer/train.py
index 704afda9c..88e3f773c 100755
--- a/egs/librispeech/ASR/zipformer/train.py
+++ b/egs/librispeech/ASR/zipformer/train.py
@@ -48,6 +48,7 @@ It supports training with:
   - transducer loss (default), with `--use-transducer True --use-ctc False`
   - ctc loss (not recommended), with `--use-transducer False --use-ctc True`
   - transducer loss & ctc loss, with `--use-transducer True --use-ctc True`
+  - ctc loss & attention decoder loss, with `--use-ctc True --use-attention-decoder True `
 """
 
 
@@ -917,7 +918,7 @@ def compute_loss(
     if params.use_ctc:
         info["ctc_loss"] = ctc_loss.detach().cpu().item()
     if params.use_attention_decoder:
-        info["attn_deocder_loss"] = attention_decoder_loss.detach().cpu().item()
+        info["attn_decoder_loss"] = attention_decoder_loss.detach().cpu().item()
 
     return loss, info