From 9a71406a46309f7bc96343d265a931fdeda3f184 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Fri, 2 Dec 2022 15:40:21 +0800
Subject: [PATCH] Reduce offset from 0.075 to 0.025.

---
 egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
index ded057867..628714019 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/scaling.py
@@ -1118,7 +1118,8 @@ class DoubleSwishFunction(torch.autograd.Function):
                 assert d_scaled.max() < 256.0
             d_int = d_scaled.to(torch.uint8)
             ctx.save_for_backward(d_int)
-        y = y + alpha * x + beta * x.clamp(min=-x_limit, max=x_limit) + 0.075
+#  on wolframalpha, do: (x * sigmoid(x-1) - 0.05 * x  + 0.05 * min(0.15, max(-0.15, x)) + 0.025)  from x=-3 to 2
+        y = y + alpha * x + beta * x.clamp(min=-x_limit, max=x_limit) + 0.025
         if x.dtype == torch.float16 or torch.is_autocast_enabled():
             y = y.to(torch.float16)
         return y