diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py
index 1e31c0a20..7c2b1ec04 100644
--- a/egs/librispeech/ASR/conformer_ctc/subsampling.py
+++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py
@@ -527,7 +527,7 @@ class DerivBalancer(torch.nn.Module):
     """
     def __init__(self, channel_dim: int,
                  min_positive: float = 0.05,
-                 max_positive: float = 0.95,
+                 max_positive: float = 1.0,
                  max_factor: float = 0.01,
                  min_abs: float = 0.2,
                  max_abs: float = 100.0):
diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py
index b68aced9f..54729652b 100644
--- a/egs/librispeech/ASR/transducer_stateless/conformer.py
+++ b/egs/librispeech/ASR/transducer_stateless/conformer.py
@@ -862,8 +862,7 @@ class ConvolutionModule(nn.Module):
         # constrain the rms values to a reasonable range via a constraint of max_abs=10.0,
         # it will be in a better position to start learning something, i.e. to latch onto
         # the correct range.
-        self.deriv_balancer = DerivBalancer(channel_dim=1, max_abs=10.0,
-                                            min_positive=0.0, max_positive=1.0)
+        self.deriv_balancer1 = DerivBalancer(channel_dim=1, max_abs=10.0)
 
         self.depthwise_conv = ScaledConv1d(
             channels,
@@ -875,7 +874,9 @@ class ConvolutionModule(nn.Module):
             bias=bias,
         )
 
-         # shape: (channels, 1), broadcasts with (batch, channel, time).
+        self.deriv_balancer2 = DerivBalancer(channel_dim=1)
+
+         # Shape: (channels, 1), broadcasts with (batch, channel, time).
         self.activation = SwishOffset()
 
         self.pointwise_conv2 = ScaledConv1d(
@@ -904,12 +905,13 @@ class ConvolutionModule(nn.Module):
         # GLU mechanism
         x = self.pointwise_conv1(x)  # (batch, 2*channels, time)
 
-        x = self.deriv_balancer(x)
+        x = self.deriv_balancer1(x)
         x = nn.functional.glu(x, dim=1)  # (batch, channels, time)
 
         # 1D Depthwise Conv
         x = self.depthwise_conv(x)
 
+        x = self.deriv_balancer2(x)
         x = self.activation(x)
 
         x = self.pointwise_conv2(x)  # (batch, channel, time)
diff --git a/egs/librispeech/ASR/transducer_stateless/train.py b/egs/librispeech/ASR/transducer_stateless/train.py
index 6c318c242..6408290b4 100755
--- a/egs/librispeech/ASR/transducer_stateless/train.py
+++ b/egs/librispeech/ASR/transducer_stateless/train.py
@@ -110,7 +110,7 @@ def get_parser():
     parser.add_argument(
         "--exp-dir",
         type=str,
-        default="transducer_stateless/randcombine1_expscale3_rework2c_maxabs1000_maxp0.95_noexp_convderiv2warmup",
+        default="transducer_stateless/randcombine1_expscale3_rework2c_maxabs1000_maxp0.95_noexp_convderiv3warmup",
         help="""The experiment dir.
         It specifies the directory where all training related
         files, e.g., checkpoints, log, etc, are saved