From 09c4946c15bbf2c2eb7deeb6ce1d7a7e36476eaa Mon Sep 17 00:00:00 2001 From: danqing fu Date: Tue, 6 Jun 2023 11:09:00 +0800 Subject: [PATCH] modify max_len to allow longer input audio --- egs/librispeech/ASR/zipformer/zipformer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py index 15022947f..85ebdb56e 100644 --- a/egs/librispeech/ASR/zipformer/zipformer.py +++ b/egs/librispeech/ASR/zipformer/zipformer.py @@ -1305,6 +1305,12 @@ class CompactRelPositionalEncoding(torch.nn.Module): ) -> None: """Construct a CompactRelPositionalEncoding object.""" super(CompactRelPositionalEncoding, self).__init__() + if torch.jit.is_tracing: + # 10k frames correspond to ~100k ms, e.g., 100 seconds, i.e., + # It assumes that the maximum input won't have more than + # 10k frames. + # + max_len = 10000 self.embed_dim = embed_dim assert embed_dim % 2 == 0 self.dropout = Dropout2(dropout_rate)