From f4ff6188d9d38e11f34b95518a7626734717ee3d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 11 Dec 2022 19:29:35 +0800 Subject: [PATCH] Set max_abs values on Conv2dSubsampling module. --- .../ASR/pruned_transducer_stateless7/zipformer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index 7e394238b..65c2ffb0d 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1777,7 +1777,8 @@ class Conv2dSubsampling(nn.Module): ), ScaleGrad(0.1), ActivationBalancer(layer1_channels, - channel_dim=1), + channel_dim=1, + max_abs=0.5), DoubleSwish(), nn.Conv2d( in_channels=layer1_channels, @@ -1787,7 +1788,8 @@ class Conv2dSubsampling(nn.Module): padding=0, ), ActivationBalancer(layer2_channels, - channel_dim=1), + channel_dim=1, + max_abs=2.0), DoubleSwish(), nn.Conv2d( in_channels=layer2_channels, @@ -1796,7 +1798,8 @@ class Conv2dSubsampling(nn.Module): stride=(1, 2), # (time, freq) ), ActivationBalancer(layer3_channels, - channel_dim=1), + channel_dim=1, + max_abs=2.0), DoubleSwish(), ) out_height = (((in_channels - 1) // 2) - 1) // 2