Simplify schedules of output balancers for nonlin_attention_module and attention_squeeze.

This commit is contained in:
Daniel Povey 2022-12-14 21:37:32 +08:00
parent 8231350ac4
commit 87df9f3215

View File

@ -1406,7 +1406,7 @@ class AttentionSqueeze(nn.Module):
self.out_balancer = ActivationBalancer(
embed_dim, channel_dim=-1,
min_positive=0.3, max_positive=0.7,
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.01), (20000.0, 0.005)),
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.05)),
)
@ -1541,7 +1541,7 @@ class NonlinAttentionModule(nn.Module):
self.balancer2 = ActivationBalancer(
channels, channel_dim=-1,
min_positive=0.3, max_positive=0.7,
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.01), (20000.0, 0.005)),
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.05)),
)