diff --git a/egs/librispeech/ASR/zipformer/optim.py b/egs/librispeech/ASR/zipformer/optim.py
index a663db708..714d8db9a 100644
--- a/egs/librispeech/ASR/zipformer/optim.py
+++ b/egs/librispeech/ASR/zipformer/optim.py
@@ -466,6 +466,8 @@ class ScaledAdam(BatchedOptimizer):
                 quartiles.append(sorted_norms[index].item())
 
             median = quartiles[2]
+            if median - median != 0:
+                raise RuntimeError("Too many grads were not finite")
             threshold = clipping_scale * median
             if step in irregular_estimate_steps:
                 # use larger thresholds on first few steps of estimating threshold,