Reduce mem consumption of softmax backward

2025-12-11 06:55:27 +00:00 · 2023-05-16 12:18:09 +08:00 · 2023-05-16 12:18:09 +08:00 · bfeeddda81
commit bfeeddda81
parent 465d41c429
1 changed files with 2 additions and 1 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@ -276,7 +276,8 @@ class SoftmaxFunction(torch.autograd.Function):
            ans_grad = ans_grad.to(torch.float32)
            ans = ans.to(torch.float32)
            x_grad = ans_grad * ans
-            x_grad = x_grad - ans * x_grad.sum(dim=ctx.dim, keepdim=True)
+            ans *= x_grad.sum(dim=ctx.dim, keepdim=True)
            x_grad -= ans
            return x_grad, None