Try to save memory in autocast mode.
This commit is contained in:
parent
7b5c0382f9
commit
d1ee1f2d98
@ -396,8 +396,12 @@ class LinearWithAuxLossFunction(torch.autograd.Function):
|
|||||||
In the backward pass it will include an auxiliary loss based on predicting x from
|
In the backward pass it will include an auxiliary loss based on predicting x from
|
||||||
matmul(y, weight).
|
matmul(y, weight).
|
||||||
"""
|
"""
|
||||||
|
if torch.is_autocast_enabled():
|
||||||
|
x = x.to(torch.float16)
|
||||||
ctx.save_for_backward(x, weight, alpha)
|
ctx.save_for_backward(x, weight, alpha)
|
||||||
ctx.aux_grad_scale = aux_grad_scale
|
ctx.aux_grad_scale = aux_grad_scale
|
||||||
|
if torch.is_autocast_enabled():
|
||||||
|
weight = weight.to(torch.float16)
|
||||||
return torch.matmul(x, weight.t())
|
return torch.matmul(x, weight.t())
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user