Change activation in bottleneck to Tanh.

This commit is contained in:
Daniel Povey 2022-11-22 17:32:02 +08:00
parent 8dfeaa5f92
commit 19683aa516

View File

@ -1306,7 +1306,7 @@ class AttentionSqueeze(nn.Module):
max_factor=0.02,
min_prob=0.1,
)
self.bottleneck_activation = DoubleSwish() # in bottleneck
self.bottleneck_activation = nn.Tanh() # in bottleneck
self.activation = Identity() # for diagnostics
# the next two balancers are only to stop parameter-magnitude 'drift': we have