From 920ed685ac40a27156bc582c334b7c14ba88d86d Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Mon, 26 Dec 2022 14:27:16 +0800
Subject: [PATCH] Change how bypass_scale works, src = src * bypass_scale +
 src_orig * (1.0 - bypass_scale)

---
 .../ASR/pruned_transducer_stateless7/zipformer.py           | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
index dd66d23b3..c08d66b0b 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@@ -474,7 +474,7 @@ class ZipformerEncoderLayer(nn.Module):
     def get_bypass_scale(self, batch_size: int):
         # returns bypass-scale of shape (num_channels,),
         # or (batch_size, num_channels,).  This is actually the
-        # scale on the delta src - src_orig, so 0 correponds to bypassing
+        # scale on the non-residual term, so 0 correponds to bypassing
         # this module.
         if torch.jit.is_scripting() or not self.training:
             return self.bypass_scale
@@ -575,9 +575,9 @@ class ZipformerEncoderLayer(nn.Module):
         src = self.balancer(src)
         src = self.norm_final(src)
 
-        delta = src - src_orig
+        bypass_scale = self.get_bypass_scale(src.shape[1])
+        src = src * bypass_scale + src_orig * (1.0 - bypass_scale)
 
-        src = src_orig + delta * self.get_bypass_scale(src.shape[1])
         src = self.whiten(src)
 
         return src, attn_weights