Merge branch 'scaled_adam_exp273' into scaled_adam_exp277

# Conflicts: # egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
2022-11-04 12:41:02 +08:00 · 2022-11-04 12:41:02 +08:00 · 70300e34d3
commit 70300e34d3
parent eb915f170c f625810de1
1 changed files with 7 additions and 21 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@ -1434,10 +1434,12 @@ class ModifiedSEModule(nn.Module):
    """
    def __init__(self,
                 d_model: int,
-                 bottleneck_dim: int = 16):
+                 bottleneck_dim: int = 8):
        super().__init__()
-        self.squeeze_proj = nn.Linear(d_model, d_model,
+        self.squeeze_proj = nn.Linear(d_model, bottleneck_dim,
                                      bias=False)
+
+
        self.in_proj = nn.Linear(d_model, d_model,
                                 bias=False)

@ -1449,22 +1451,12 @@ class ModifiedSEModule(nn.Module):
        self.balancer = ActivationBalancer(
            d_model, channel_dim=-1,
            min_positive=0.05, max_positive=0.95,
+            min_abs=0.1,
            max_abs=50.0,
-            max_factor=0.01,
+            max_factor=0.02,
            min_prob=0.2,
        )
        self.activation = DoubleSwish()
-        self.to_bottleneck_proj = ScaledLinear(d_model, bottleneck_dim)
-
-        self.bottleneck_balancer = ActivationBalancer(
-            bottleneck_dim, channel_dim=-1,
-            min_positive=0.05, max_positive=0.95,
-            max_abs=5.0,
-            min_abs=0.5,
-            max_factor=0.01,
-            min_prob=0.2,
-        )
-        #self.bottleneck_norm = BasicNorm(bottleneck_dim)

        self.from_bottleneck_proj =  ScaledLinear(bottleneck_dim, d_model)

@ -1503,14 +1495,8 @@ class ModifiedSEModule(nn.Module):
        squeezed = self.squeeze_proj(squeezed)
        squeezed = self.balancer(squeezed)
        squeezed = self.activation(squeezed)
-        squeezed = self.to_bottleneck_proj(squeezed)
-        squeezed = self.bottleneck_balancer(squeezed)
-        #squeezed = self.bottleneck_norm(squeezed)
        squeezed = self.from_bottleneck_proj(squeezed)
-        if random.random() < 0.05:
-            # to stop a hopefully-unlikely failure mode where the inputs to the sigmoid
-            # get too large and the grads get mostly too small.
-            squeezed = penalize_abs_values_gt(squeezed, limit=10.0, penalty=1.0e-04)
+

        x = self.in_proj(x)
        x = x * squeezed