diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/export.py b/egs/librispeech/ASR/pruned_transducer_stateless/export.py
index 7d2a07817..a4210831c 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless/export.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless/export.py
@@ -116,8 +116,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -159,6 +157,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
index 257936b59..840d847cb 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/conformer.py
@@ -112,10 +112,13 @@ class Conformer(EncoderInterface):
         x, pos_emb = self.encoder_pos(x)
         x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # Caution: We assume the subsampling factor is 4!
-            lengths = ((x_lens - 1) // 2 - 1) // 2
+        # Caution: We assume the subsampling factor is 4!
+
+        #  lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
+        #
+        # Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
+        lengths = (((x_lens - 1) >> 1) - 1) >> 1
+
         assert x.size(0) == lengths.max().item()
         mask = make_pad_mask(lengths)
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/export.py b/egs/librispeech/ASR/pruned_transducer_stateless2/export.py
index 6b3a7a9ff..cff9c7377 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless2/export.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/export.py
@@ -131,8 +131,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -191,6 +189,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py
index f89d2963e..5ee4bab98 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless2/scaling.py
@@ -212,7 +212,10 @@ class ScaledLinear(nn.Linear):
         return self.weight * self.weight_scale.exp()
 
     def get_bias(self):
-        return None if self.bias is None else self.bias * self.bias_scale.exp()
+        if self.bias is None or self.bias_scale is None:
+            return None
+
+        return self.bias * self.bias_scale.exp()
 
     def forward(self, input: Tensor) -> Tensor:
         return torch.nn.functional.linear(
@@ -255,7 +258,11 @@ class ScaledConv1d(nn.Conv1d):
         return self.weight * self.weight_scale.exp()
 
     def get_bias(self):
-        return None if self.bias is None else self.bias * self.bias_scale.exp()
+        bias = self.bias
+        bias_scale = self.bias_scale
+        if bias is None or bias_scale is None:
+            return None
+        return bias * bias_scale.exp()
 
     def forward(self, input: Tensor) -> Tensor:
         F = torch.nn.functional
@@ -269,7 +276,7 @@ class ScaledConv1d(nn.Conv1d):
                 self.get_weight(),
                 self.get_bias(),
                 self.stride,
-                _single(0),
+                (0,),
                 self.dilation,
                 self.groups,
             )
@@ -319,7 +326,12 @@ class ScaledConv2d(nn.Conv2d):
         return self.weight * self.weight_scale.exp()
 
     def get_bias(self):
-        return None if self.bias is None else self.bias * self.bias_scale.exp()
+        # see https://github.com/pytorch/pytorch/issues/24135
+        bias = self.bias
+        bias_scale = self.bias_scale
+        if bias is None or bias_scale is None:
+            return None
+        return bias * bias_scale.exp()
 
     def _conv_forward(self, input, weight):
         F = torch.nn.functional
@@ -333,7 +345,7 @@ class ScaledConv2d(nn.Conv2d):
                 weight,
                 self.get_bias(),
                 self.stride,
-                _pair(0),
+                (0, 0),
                 self.dilation,
                 self.groups,
             )
@@ -398,6 +410,9 @@ class ActivationBalancer(torch.nn.Module):
         self.max_abs = max_abs
 
     def forward(self, x: Tensor) -> Tensor:
+        if torch.jit.is_scripting():
+            return x
+
         return ActivationBalancerFunction.apply(
             x,
             self.channel_dim,
@@ -444,6 +459,8 @@ class DoubleSwish(torch.nn.Module):
         """Return double-swish activation function which is an approximation to Swish(Swish(x)),
         that we approximate closely with x * sigmoid(x-1).
         """
+        if torch.jit.is_scripting():
+            return x * torch.sigmoid(x - 1.0)
         return DoubleSwishFunction.apply(x)
 
 
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/export.py b/egs/librispeech/ASR/pruned_transducer_stateless3/export.py
index 0cdb0b957..e674fb360 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless3/export.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless3/export.py
@@ -132,8 +132,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -192,6 +190,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
new file mode 100755
index 000000000..e9dfe6d5e
--- /dev/null
+++ b/egs/librispeech/ASR/pruned_transducer_stateless3/test_scaling.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# Copyright    2022  Xiaomi Corp.        (authors: Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+To run this file, do:
+
+    cd icefall/egs/librispeech/ASR
+    python ./pruned_transducer_stateless3/test_scaling.py
+"""
+
+import torch
+from scaling import ActivationBalancer, ScaledConv1d, ScaledConv2d
+
+
+def test_scaled_conv1d():
+    for bias in [True, False]:
+        conv1d = ScaledConv1d(
+            3,
+            6,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            bias=bias,
+        )
+        torch.jit.script(conv1d)
+
+
+def test_scaled_conv2d():
+    for bias in [True, False]:
+        conv2d = ScaledConv2d(
+            in_channels=1,
+            out_channels=3,
+            kernel_size=3,
+            padding=1,
+            bias=bias,
+        )
+        torch.jit.script(conv2d)
+
+
+def test_activation_balancer():
+    act = ActivationBalancer(
+        channel_dim=1, max_abs=10.0, min_positive=0.05, max_positive=1.0
+    )
+    torch.jit.script(act)
+
+
+def main():
+    test_scaled_conv1d()
+    test_scaled_conv2d()
+    test_activation_balancer()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py
index 488c82386..51f13b73f 100644
--- a/egs/librispeech/ASR/transducer_stateless/conformer.py
+++ b/egs/librispeech/ASR/transducer_stateless/conformer.py
@@ -109,10 +109,12 @@ class Conformer(Transformer):
         x, pos_emb = self.encoder_pos(x)
         x = x.permute(1, 0, 2)  # (N, T, C) -> (T, N, C)
 
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # Caution: We assume the subsampling factor is 4!
-            lengths = ((x_lens - 1) // 2 - 1) // 2
+        # Caution: We assume the subsampling factor is 4!
+
+        #  lengths = ((x_lens - 1) // 2 - 1) // 2 # issue an warning
+        #
+        # Note: rounding_mode in torch.div() is available only in torch >= 1.8.0
+        lengths = (((x_lens - 1) >> 1) - 1) >> 1
 
         assert x.size(0) == lengths.max().item()
         mask = make_pad_mask(lengths)
diff --git a/egs/librispeech/ASR/transducer_stateless/export.py b/egs/librispeech/ASR/transducer_stateless/export.py
index 5687260df..8bd0bdea1 100755
--- a/egs/librispeech/ASR/transducer_stateless/export.py
+++ b/egs/librispeech/ASR/transducer_stateless/export.py
@@ -183,8 +183,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -226,6 +224,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"
diff --git a/egs/librispeech/ASR/transducer_stateless2/export.py b/egs/librispeech/ASR/transducer_stateless2/export.py
index 7a68f69ff..57c1a6094 100755
--- a/egs/librispeech/ASR/transducer_stateless2/export.py
+++ b/egs/librispeech/ASR/transducer_stateless2/export.py
@@ -115,8 +115,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -158,6 +156,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"
diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py
index 7d14d011d..b6b69d932 100755
--- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py
+++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/export.py
@@ -184,8 +184,6 @@ def main():
     args = get_parser().parse_args()
     args.exp_dir = Path(args.exp_dir)
 
-    assert args.jit is False, "Support torchscript will be added later"
-
     params = get_params()
     params.update(vars(args))
 
@@ -229,6 +227,11 @@ def main():
     model.eval()
 
     if params.jit:
+        # We won't use the forward() method of the model in C++, so just ignore
+        # it here.
+        # Otherwise, one of its arguments is a ragged tensor and is not
+        # torch scriptabe.
+        model.__class__.forward = torch.jit.ignore(model.__class__.forward)
         logging.info("Using torch.jit.script")
         model = torch.jit.script(model)
         filename = params.exp_dir / "cpu_jit.pt"