diff --git a/egs/librispeech/ASR/zipformer/optim.py b/egs/librispeech/ASR/zipformer/optim.py index a663db708..714d8db9a 100644 --- a/egs/librispeech/ASR/zipformer/optim.py +++ b/egs/librispeech/ASR/zipformer/optim.py @@ -466,6 +466,8 @@ class ScaledAdam(BatchedOptimizer): quartiles.append(sorted_norms[index].item()) median = quartiles[2] + if median - median != 0: + raise RuntimeError("Too many grads were not finite") threshold = clipping_scale * median if step in irregular_estimate_steps: # use larger thresholds on first few steps of estimating threshold,