Reduce min_abs of zipformer balancer1; constraints on eps of Conv2dSubsampling.out_norm

2023-01-01 14:26:57 +08:00 · 2023-01-01 14:26:57 +08:00 · 287bd120be
commit 287bd120be
parent 1797d0ec6d
2 changed files with 6 additions and 2 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@ -1390,6 +1390,7 @@ class LimitParamValue(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x: Tensor, min: float, max: float):
        ctx.save_for_backward(x)
+        assert max >= min
        ctx.min = min
        ctx.max = max
        return x
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@ -459,7 +459,7 @@ class ZipformerEncoderLayer(nn.Module):
        self.balancer1 = Balancer(
            embed_dim, channel_dim=-1,
            min_positive=0.45, max_positive=0.55,
-            min_abs=1.0, max_abs=4.0,
+            min_abs=0.2, max_abs=4.0,
        )

        # balancer for output of NonlinAttentionModule
@ -1878,7 +1878,10 @@ class Conv2dSubsampling(nn.Module):
                                 prob=(0.025, 0.25),
                                 grad_scale=0.02)

-        self.out_norm = BasicNorm(out_channels)
+        # max_log_eps=0.0 is to prevent both eps and the output of self.out from
+        # getting large, there is an unnecessary degree of freedom.
+        self.out_norm = BasicNorm(out_channels, eps=1.0,
+                                  min_log_eps=-0.1, max_log_eps=0.0)
        self.dropout = Dropout2(dropout)