diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py
index 85ebdb56e..660bdeb1d 100644
--- a/egs/librispeech/ASR/zipformer/zipformer.py
+++ b/egs/librispeech/ASR/zipformer/zipformer.py
@@ -1305,11 +1305,11 @@ class CompactRelPositionalEncoding(torch.nn.Module):
     ) -> None:
         """Construct a CompactRelPositionalEncoding object."""
         super(CompactRelPositionalEncoding, self).__init__()
-        if torch.jit.is_tracing:
-            # 10k frames correspond to ~100k ms, e.g., 100 seconds, i.e., 
-            # It assumes that the maximum input won't have more than 
-            # 10k frames. 
-            #
+        if torch.jit.is_tracing():
+            # It assumes that the maximum input, after downsampling, won't have more than 
+            # 10k frames.
+            # The first downsampling factor is 2, so the maximum input 
+            # should contain less than 20k frames, e.g., less than 200 seconds, i.e., 3.33 minutes
             max_len = 10000
         self.embed_dim = embed_dim
         assert embed_dim % 2 == 0