mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
Apply limit on BasicNorm.eps more effectively using limit_param_value; add final norm to Zipformer.
This commit is contained in:
parent
049174722f
commit
2e0f4de8ff
@ -481,16 +481,10 @@ class BasicNorm(torch.nn.Module):
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
assert x.shape[self.channel_dim] == self.num_channels
|
||||
eps = self.eps
|
||||
if self.training and random.random() < 0.25:
|
||||
# with probability 0.25, in training mode, clamp eps between the min
|
||||
# and max; this will encourage it to learn parameters within the
|
||||
# allowed range by making parameters that are outside the allowed
|
||||
# range noisy.
|
||||
|
||||
# gradients to allow the parameter to get back into the allowed
|
||||
# region if it happens to exit it.
|
||||
eps = eps.clamp(min=self.eps_min, max=self.eps_max)
|
||||
eps = self.eps
|
||||
if self.training:
|
||||
eps = limit_param_value(self.eps, min=self.eps_min, max=self.eps_max)
|
||||
eps = eps.exp()
|
||||
scales = (
|
||||
(torch.mean(x ** 2, dim=self.channel_dim, keepdim=True) + eps) /
|
||||
|
||||
@ -216,6 +216,7 @@ class Zipformer(EncoderInterface):
|
||||
encoder_dim[-1],
|
||||
downsample=output_downsampling_factor,
|
||||
dropout=dropout)
|
||||
self.norm = BasicNorm(num_channels=encoder_dim[-1])
|
||||
|
||||
|
||||
def _init_skip_modules(self):
|
||||
@ -357,6 +358,7 @@ class Zipformer(EncoderInterface):
|
||||
lengths = (lengths + 1) // 2
|
||||
|
||||
x = x.permute(1, 0, 2) # (T, N, C) ->(N, T, C)
|
||||
x = self.norm(x)
|
||||
|
||||
return x, lengths
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user