Merge branch 'scaled_adam_exp703' into scaled_adam_exp709

This commit is contained in:
Daniel Povey 2022-12-14 21:41:32 +08:00
commit cec2162a17

View File

@ -1297,7 +1297,7 @@ class SelfAttention(nn.Module):
initial_scale=0.05)
self.whiten = Whiten(num_groups=1,
whitening_limit=_whitening_schedule(7.5),
whitening_limit=_whitening_schedule(7.5, ratio=3.0),
prob=(0.025, 0.25),
grad_scale=0.01)
@ -1534,7 +1534,7 @@ class NonlinAttentionModule(nn.Module):
grad_scale=0.01)
self.whiten2 = Whiten(num_groups=1,
whitening_limit=_whitening_schedule(5.0),
whitening_limit=_whitening_schedule(5.0, ratio=3.0),
prob=(0.025, 0.25),
grad_scale=0.01)