Try to save memory in autocast mode.

2022-11-26 14:25:27 +08:00 · 2022-11-26 14:25:27 +08:00 · d1ee1f2d98
commit d1ee1f2d98
parent 7b5c0382f9
1 changed files with 4 additions and 0 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@ -396,8 +396,12 @@ class LinearWithAuxLossFunction(torch.autograd.Function):
        In the backward pass it will include an auxiliary loss based on predicting x from
        matmul(y, weight).
        """
+        if torch.is_autocast_enabled():
+            x = x.to(torch.float16)
        ctx.save_for_backward(x, weight, alpha)
        ctx.aux_grad_scale = aux_grad_scale
+        if torch.is_autocast_enabled():
+            weight = weight.to(torch.float16)
        return torch.matmul(x, weight.t())