Apply limit on BasicNorm.eps more effectively using limit_param_value; add final norm to Zipformer.

2025-12-11 06:55:27 +00:00 · 2022-12-23 15:59:51 +08:00 · 2022-12-23 15:59:51 +08:00 · 2e0f4de8ff
commit 2e0f4de8ff
parent 049174722f
2 changed files with 5 additions and 9 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@ -481,16 +481,10 @@ class BasicNorm(torch.nn.Module):
    def forward(self, x: Tensor) -> Tensor:
        assert x.shape[self.channel_dim] == self.num_channels
        eps = self.eps
        if self.training and random.random() < 0.25:
            # with probability 0.25, in training mode, clamp eps between the min
            # and max; this will encourage it to learn parameters within the
            # allowed range by making parameters that are outside the allowed
            # range noisy.
-            # gradients to allow the parameter to get back into the allowed
+        eps = self.eps
-            # region if it happens to exit it.
+        if self.training:
-            eps = eps.clamp(min=self.eps_min, max=self.eps_max)
+            eps = limit_param_value(self.eps, min=self.eps_min, max=self.eps_max)
        eps = eps.exp()
        scales = (
            (torch.mean(x ** 2, dim=self.channel_dim, keepdim=True) + eps) /
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@ -216,6 +216,7 @@ class Zipformer(EncoderInterface):
                                                     encoder_dim[-1],
                                                     downsample=output_downsampling_factor,
                                                     dropout=dropout)
        self.norm = BasicNorm(num_channels=encoder_dim[-1])
    def _init_skip_modules(self):
@ -357,6 +358,7 @@ class Zipformer(EncoderInterface):
        lengths = (lengths + 1) // 2
        x = x.permute(1, 0, 2)  # (T, N, C) ->(N, T, C)
        x = self.norm(x)
        return x, lengths