diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py index 0b51057cf..51a0f99e8 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py @@ -245,7 +245,6 @@ class CachingEvalFunction(torch.autograd.Function): # Caution: this assumes you are not going to use any random numbers from torch (for any purpose # that matters in the forward pass), e.g. there should be no dropout. ctx.random_state = random.getstate() - ctx.save_for_backward(x) # we are inside torch.no_grad() here, so the following won't create the computation graph. y = m(x) ctx.save_for_backward(x, y)