Documentation fix.

This commit is contained in:
Daniel Povey 2022-11-28 12:10:08 +08:00
parent c6d859dd05
commit 121f7e2a45

View File

@ -1314,11 +1314,11 @@ class AttentionSqueeze(nn.Module):
self.bottleneck_activation = TanSwish() # in bottleneck self.bottleneck_activation = TanSwish() # in bottleneck
self.activation = Identity() # for diagnostics self.activation = Identity() # for diagnostics
# the next two balancers are only to stop parameter-magnitude 'drift': we have # the reason for the min_abs and max_abs limits on the next two
# too many degrees of freedom for the scales of the various activations. # balancers are only to stop parameter-magnitude 'drift': we have too
# Make them run with very low probability, since only a small application of # many degrees of freedom for the scales of the various activations.
# these balancers should be enough to stop such "drift"; and, for speed, # Make them run with very low probability, since only a small
# put no limitation on the signs (so: min_positive=0, max_positive=1). # application of these balancers should be enough to stop such "drift".
self.scale_balancer = ActivationBalancer( self.scale_balancer = ActivationBalancer(
embed_dim, channel_dim=-1, embed_dim, channel_dim=-1,
min_positive=0.2, max_positive=0.8, min_positive=0.2, max_positive=0.8,