From 4058d56c0d2a8beef67dd6653ecff3cba1b24b12 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 26 Nov 2022 14:04:41 +0800 Subject: [PATCH] Remove squeeze_excite from Conv2dSubsampling. --- .../pruned_transducer_stateless7/zipformer.py | 45 ------------------- 1 file changed, 45 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 0811406e3..6a4f08f6d 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1606,48 +1606,6 @@ class ConvolutionModule(nn.Module): return x -class SqueezeExcite1d(nn.Module): - def __init__(self, - channels: int, - bottleneck_channels: int): - super().__init__() - self.to_bottleneck_proj = LinearWithAuxLoss(channels, - bottleneck_channels) - - self.bottleneck_activation = TanSwish() - self.from_bottleneck_proj = nn.Linear(bottleneck_channels, - channels) - - self.balancer = ActivationBalancer( - channels, channel_dim=-1, - min_abs=0.05, - max_abs=ScheduledFloat((0.0, 0.2), - (4000.0, 2.0), - (10000.0, 10.0), - default=1.0), - max_factor=0.02, - min_prob=0.1, - ) - self.activation = nn.Sigmoid() - - - - def forward(self, x: Tensor): - """ - x: a Tensor of shape (batch_size, T, channels). - Returns: something with the same shape as x. - """ - # project before mean, needed for LinearWithAuxLoss (or, at least, better) - bottleneck = self.to_bottleneck_proj(x) - # would replace this mean with cumsum for a causal model. - bottleneck = bottleneck.mean(dim=1, keepdim=True) - bottleneck = self.bottleneck_activation(bottleneck) - scale = self.from_bottleneck_proj(bottleneck) - scale = self.balancer(scale) - scale = self.activation(scale) - return x * scale - - class Conv2dSubsampling(nn.Module): """Convolutional 2D subsampling (to 1/2 length). @@ -1718,8 +1676,6 @@ class Conv2dSubsampling(nn.Module): ) out_height = (((in_channels - 1) // 2) - 1) // 2 - self.squeeze_excite = SqueezeExcite1d(out_height * layer3_channels, - bottleneck_channels) self.out = LinearWithAuxLoss(out_height * layer3_channels, out_channels, aux_grad_scale=ScheduledFloat((0.0, 0.2), (1000.0, 0.01))) @@ -1745,7 +1701,6 @@ class Conv2dSubsampling(nn.Module): x = x.transpose(1, 2).reshape(b, t, c * f) # now x: (N, ((T-1)//2 - 1))//2, out_height * layer3_channels)) - x = self.squeeze_excite(x) x = self.out(x) # Now x is of shape (N, ((T-1)//2 - 1))//2, odim) x = self.dropout(x)