Use 2 groups in whitening for NonlinAttentionModule; limit 40->20.

This commit is contained in:
Daniel Povey 2022-11-21 23:23:41 +08:00
parent b3b5e8b9b9
commit 71f118e725

View File

@ -1424,8 +1424,8 @@ class NonlinAttentionModule(nn.Module):
)
# give it a high limit, because it is quite high-dimensional and is
# a projection of a lower-dimensional embedding.
self.whiten = Whiten(num_groups=1,
whitening_limit=40.0,
self.whiten = Whiten(num_groups=2,
whitening_limit=20.0,
prob=(0.025, 0.25),
grad_scale=0.01)