diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index c3c7c32de..b7df5b0f3 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1434,10 +1434,12 @@ class ModifiedSEModule(nn.Module): """ def __init__(self, d_model: int, - bottleneck_dim: int = 16): + bottleneck_dim: int = 8): super().__init__() - self.squeeze_proj = nn.Linear(d_model, d_model, + self.squeeze_proj = nn.Linear(d_model, bottleneck_dim, bias=False) + + self.in_proj = nn.Linear(d_model, d_model, bias=False) @@ -1449,22 +1451,12 @@ class ModifiedSEModule(nn.Module): self.balancer = ActivationBalancer( d_model, channel_dim=-1, min_positive=0.05, max_positive=0.95, + min_abs=0.1, max_abs=50.0, - max_factor=0.01, + max_factor=0.02, min_prob=0.2, ) self.activation = DoubleSwish() - self.to_bottleneck_proj = ScaledLinear(d_model, bottleneck_dim) - - self.bottleneck_balancer = ActivationBalancer( - bottleneck_dim, channel_dim=-1, - min_positive=0.05, max_positive=0.95, - max_abs=5.0, - min_abs=0.5, - max_factor=0.01, - min_prob=0.2, - ) - #self.bottleneck_norm = BasicNorm(bottleneck_dim) self.from_bottleneck_proj = ScaledLinear(bottleneck_dim, d_model) @@ -1503,14 +1495,8 @@ class ModifiedSEModule(nn.Module): squeezed = self.squeeze_proj(squeezed) squeezed = self.balancer(squeezed) squeezed = self.activation(squeezed) - squeezed = self.to_bottleneck_proj(squeezed) - squeezed = self.bottleneck_balancer(squeezed) - #squeezed = self.bottleneck_norm(squeezed) squeezed = self.from_bottleneck_proj(squeezed) - if random.random() < 0.05: - # to stop a hopefully-unlikely failure mode where the inputs to the sigmoid - # get too large and the grads get mostly too small. - squeezed = penalize_abs_values_gt(squeezed, limit=10.0, penalty=1.0e-04) + x = self.in_proj(x) x = x * squeezed