From 9cc5999829ef1441d99804204d1f61a796bc4948 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Fri, 4 Mar 2022 15:50:51 +0800
Subject: [PATCH] Fix duplicate Swish; replace norm+swish with swish+exp-scale
 in convolution module

---
 egs/librispeech/ASR/transducer_stateless/conformer.py | 9 +++------
 egs/librispeech/ASR/transducer_stateless/train.py     | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py
index ef6b4ac97..dc6b54399 100644
--- a/egs/librispeech/ASR/transducer_stateless/conformer.py
+++ b/egs/librispeech/ASR/transducer_stateless/conformer.py
@@ -163,7 +163,6 @@ class ConformerEncoderLayer(nn.Module):
 
         self.feed_forward_macaron = nn.Sequential(
             nn.Linear(d_model, dim_feedforward),
-            Swish(),
             ExpScaleSwish(dim_feedforward, speed=50.0),
             nn.Dropout(dropout),
             nn.Linear(dim_feedforward, d_model),
@@ -874,7 +873,9 @@ class ConvolutionModule(nn.Module):
             groups=channels,
             bias=bias,
         )
-        self.norm = nn.LayerNorm(channels)
+        # shape: (channels, 1), broadcasts with (batch, channel, time).
+        self.activation = ExpScaleSwish(channels, 1, speed=50.0)
+
         self.pointwise_conv2 = nn.Conv1d(
             channels,
             channels,
@@ -883,7 +884,6 @@ class ConvolutionModule(nn.Module):
             padding=0,
             bias=bias,
         )
-        self.activation = Swish()
 
     def forward(self, x: Tensor) -> Tensor:
         """Compute convolution module.
@@ -905,9 +905,6 @@ class ConvolutionModule(nn.Module):
         # 1D Depthwise Conv
         x = self.depthwise_conv(x)
         # x is (batch, channels, time)
-        x = x.permute(0, 2, 1)
-        x = self.norm(x)
-        x = x.permute(0, 2, 1)
 
         x = self.activation(x)
 
diff --git a/egs/librispeech/ASR/transducer_stateless/train.py b/egs/librispeech/ASR/transducer_stateless/train.py
index 980633ed6..973733d4b 100755
--- a/egs/librispeech/ASR/transducer_stateless/train.py
+++ b/egs/librispeech/ASR/transducer_stateless/train.py
@@ -110,7 +110,7 @@ def get_parser():
     parser.add_argument(
         "--exp-dir",
         type=str,
-        default="transducer_stateless/specaugmod_baseline_randcombine1_expscale4",
+        default="transducer_stateless/specaugmod_baseline_randcombine1_expscale5",
         help="""The experiment dir.
         It specifies the directory where all training related
         files, e.g., checkpoints, log, etc, are saved