Cast grad_scale in whiten to float (#1663)

* cast grad_scale in whiten to float

* fix cast in zipformer_lora
This commit is contained in:
Teo Wen Shen 2024-07-11 16:12:30 +09:00 committed by GitHub
parent d65187ec52
commit 19048e155b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 4 deletions

View File

@ -1033,7 +1033,7 @@ class WhiteningPenaltyFunction(torch.autograd.Function):
w.prob = w.max_prob w.prob = w.max_prob
metric.backward() metric.backward()
penalty_grad = x_detached.grad penalty_grad = x_detached.grad
scale = w.grad_scale * ( scale = float(w.grad_scale) * (
x_grad.to(torch.float32).norm() x_grad.to(torch.float32).norm()
/ (penalty_grad.norm() + 1.0e-20) / (penalty_grad.norm() + 1.0e-20)
) )
@ -1075,7 +1075,7 @@ class Whiten(nn.Module):
super(Whiten, self).__init__() super(Whiten, self).__init__()
assert num_groups >= 1 assert num_groups >= 1
assert float(whitening_limit) >= 1 assert float(whitening_limit) >= 1
assert grad_scale >= 0 assert float(grad_scale) >= 0
self.num_groups = num_groups self.num_groups = num_groups
self.whitening_limit = whitening_limit self.whitening_limit = whitening_limit
self.grad_scale = grad_scale self.grad_scale = grad_scale

View File

@ -1137,7 +1137,7 @@ class WhiteningPenaltyFunction(torch.autograd.Function):
w.prob = w.max_prob w.prob = w.max_prob
metric.backward() metric.backward()
penalty_grad = x_detached.grad penalty_grad = x_detached.grad
scale = w.grad_scale * ( scale = float(w.grad_scale) * (
x_grad.to(torch.float32).norm() x_grad.to(torch.float32).norm()
/ (penalty_grad.norm() + 1.0e-20) / (penalty_grad.norm() + 1.0e-20)
) )
@ -1179,7 +1179,7 @@ class Whiten(nn.Module):
super(Whiten, self).__init__() super(Whiten, self).__init__()
assert num_groups >= 1 assert num_groups >= 1
assert float(whitening_limit) >= 1 assert float(whitening_limit) >= 1
assert grad_scale >= 0 assert float(grad_scale) >= 0
self.num_groups = num_groups self.num_groups = num_groups
self.whitening_limit = whitening_limit self.whitening_limit = whitening_limit
self.grad_scale = grad_scale self.grad_scale = grad_scale