Make it use float16 if in amp but use clamp to avoid wrapping error

This commit is contained in:
Daniel Povey 2022-10-23 21:13:23 +08:00
parent 85657946bb
commit d3876e32c4

View File

@ -922,8 +922,7 @@ class DoubleSwishFunction(torch.autograd.Function):
if requires_grad:
# discretize s. This should be expectation-preserving if we just divide the
# result by 255.
s = s.to(torch.float)
s = ((s * 254.99) + torch.rand_like(s)).to(torch.uint8)
s = ((s * 255) + torch.rand_like(s)).clamp(max=255).to(torch.uint8)
ctx.save_for_backward(s, y)
return y