From a37d98463aeaf0fd9370128cd0f03663bb3aaab1 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 6 Mar 2022 11:55:02 +0800 Subject: [PATCH] Restore ConvolutionModule to state before changes; change all Swish,Swish(Swish) to SwishOffset. --- .../ASR/conformer_ctc/subsampling.py | 5 ++--- .../ASR/transducer_stateless/conformer.py | 22 ++++++++++++++----- .../ASR/transducer_stateless/train.py | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py index daf8fd251..1fe1265fa 100644 --- a/egs/librispeech/ASR/conformer_ctc/subsampling.py +++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py @@ -212,9 +212,8 @@ class ExpScale(torch.nn.Module): def _exp_scale_swish(x: Tensor, scale: Tensor, speed: float) -> Tensor: - # double-swish! - x = (x * torch.sigmoid(x)) - x = (x * torch.sigmoid(x)) + # double-swish, implemented/approximated as offset-swish + x = (x * torch.sigmoid(x - 1.0)) x = x * (scale * speed).exp() return x diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py index 5adb7ca4e..62d9f382f 100644 --- a/egs/librispeech/ASR/transducer_stateless/conformer.py +++ b/egs/librispeech/ASR/transducer_stateless/conformer.py @@ -877,10 +877,10 @@ class ConvolutionModule(nn.Module): groups=channels, bias=bias, ) - self.balancer = DerivBalancer(channel_dim=1, threshold=0.05, - max_factor=0.025) - # shape: (channels, 1), broadcasts with (batch, channel, time). - self.activation = ExpScaleSwish(channels, 1, speed=20.0) + + self.norm = nn.LayerNorm(channels) + # shape: (channels, 1), broadcasts with (batch, channel, time). + self.activation = SwishOffset() self.pointwise_conv2 = nn.Conv1d( channels, @@ -911,8 +911,10 @@ class ConvolutionModule(nn.Module): # 1D Depthwise Conv x = self.depthwise_conv(x) # x is (batch, channels, time) + x = x.permute(0, 2, 1) + x = self.norm(x) + x = x.permute(0, 2, 1) - x = self.balancer(x) x = self.activation(x) x = self.pointwise_conv2(x) # (batch, channel, time) @@ -927,6 +929,16 @@ class Swish(torch.nn.Module): """Return Swich activation function.""" return x * torch.sigmoid(x) +class SwishOffset(torch.nn.Module): + """Construct an SwishOffset object.""" + def __init__(self, offset: float = -1.0) -> None: + super(SwishOffset, self).__init__() + self.offset = offset + + def forward(self, x: Tensor) -> Tensor: + """Return Swich activation function.""" + return x * torch.sigmoid(x + self.offset) + def identity(x): return x diff --git a/egs/librispeech/ASR/transducer_stateless/train.py b/egs/librispeech/ASR/transducer_stateless/train.py index a3eca26c9..16746147f 100755 --- a/egs/librispeech/ASR/transducer_stateless/train.py +++ b/egs/librispeech/ASR/transducer_stateless/train.py @@ -110,7 +110,7 @@ def get_parser(): parser.add_argument( "--exp-dir", type=str, - default="transducer_stateless/specaugmod_baseline_randcombine1_expscale5_brelu2swish2", + default="transducer_stateless/specaugmod_baseline_randcombine1_expscale3_brelu2swish2", help="""The experiment dir. It specifies the directory where all training related files, e.g., checkpoints, log, etc, are saved