Merge branch 'scaled_adam_exp708' into scaled_adam_exp709

This commit is contained in:
Daniel Povey 2022-12-14 22:56:09 +08:00
commit 9e79b296f2

View File

@ -1406,7 +1406,7 @@ class AttentionSqueeze(nn.Module):
self.out_balancer = ActivationBalancer( self.out_balancer = ActivationBalancer(
embed_dim, channel_dim=-1, embed_dim, channel_dim=-1,
min_positive=0.3, max_positive=0.7, min_positive=0.3, max_positive=0.7,
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.05)), min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.005)),
) )
@ -1541,7 +1541,7 @@ class NonlinAttentionModule(nn.Module):
self.balancer2 = ActivationBalancer( self.balancer2 = ActivationBalancer(
channels, channel_dim=-1, channels, channel_dim=-1,
min_positive=0.3, max_positive=0.7, min_positive=0.3, max_positive=0.7,
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.05)), min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.005)),
) )