Add -0.05 to DoubleSwish.

This commit is contained in:
Daniel Povey 2022-12-02 15:17:41 +08:00
parent 4afd95d822
commit f0f204552d

View File

@ -1118,7 +1118,7 @@ class DoubleSwishFunction(torch.autograd.Function):
assert d_scaled.max() < 256.0 assert d_scaled.max() < 256.0
d_int = d_scaled.to(torch.uint8) d_int = d_scaled.to(torch.uint8)
ctx.save_for_backward(d_int) ctx.save_for_backward(d_int)
y = y + alpha * x + beta * x.clamp(min=-x_limit, max=x_limit) y = y + alpha * x + beta * x.clamp(min=-x_limit, max=x_limit) + -0.05
if x.dtype == torch.float16 or torch.is_autocast_enabled(): if x.dtype == torch.float16 or torch.is_autocast_enabled():
y = y.to(torch.float16) y = y.to(torch.float16)
return y return y