diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index cfc15ab94..baa096334 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -228,7 +228,8 @@ class Zipformer(EncoderInterface): x = self.downsample_output(x) # class Downsample has this rounding behavior.. - lengths = (x_lens + 1) // 2 + assert self.output_downsampling_factor == 2 + lengths = (lengths + 1) // 2 x = x.permute(1, 0, 2) # (T, N, C) ->(N, T, C)