Multiply length_factor by 2.0.

2025-12-11 06:55:27 +00:00 · 2022-11-19 13:36:16 +08:00 · 2022-11-19 13:36:16 +08:00 · d23fda7c5f
commit d23fda7c5f
parent e9806950f5
1 changed files with 4 additions and 1 deletions
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py
@ -937,7 +937,10 @@ class CompactRelPositionalEncoding(torch.nn.Module):
        # length_factor is chosen so that the FFT can exactly separate points
        # close to the origin (T == 0).  So this part of the formulation is not really
        # heuristic.
-        length_factor =  self.embed_dim / (2.0 * math.pi) # todo: test this.
+        length_factor =  self.embed_dim / (2.0 * math.pi)
        # multiplying length_factor by this heuristic constant should reduce the resolution near to the
        # origin, i.e. reduce its ability to separate points near zero.
        length_factor *= 2.0
        # note for machine implementations: if atan is not available, we can use:
        #   x.sign() * ((1 / (x.abs() + 1)) - 1)  * (-math.pi/2)