diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py index 97b9ae97b..e38a94d09 100644 --- a/egs/librispeech/ASR/conformer_ctc/subsampling.py +++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py @@ -48,12 +48,12 @@ class Conv2dSubsampling(nn.Module): in_channels=1, out_channels=odim, kernel_size=3, stride=2 ), nn.ReLU(), - ExpScale(odim, 1, 1, speed=50.0), + ExpScale(odim, 1, 1, speed=20.0), nn.Conv2d( in_channels=odim, out_channels=odim, kernel_size=3, stride=2 ), nn.ReLU(), - ExpScale(odim, 1, 1, speed=50.0), + ExpScale(odim, 1, 1, speed=20.0), ) self.out = nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim) self.out_norm = nn.LayerNorm(odim, elementwise_affine=False) diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py index dc6b54399..368165008 100644 --- a/egs/librispeech/ASR/transducer_stateless/conformer.py +++ b/egs/librispeech/ASR/transducer_stateless/conformer.py @@ -156,14 +156,14 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward = nn.Sequential( nn.Linear(d_model, dim_feedforward), - ExpScaleSwish(dim_feedforward, speed=50.0), + ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model), ) self.feed_forward_macaron = nn.Sequential( nn.Linear(d_model, dim_feedforward), - ExpScaleSwish(dim_feedforward, speed=50.0), + ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model), ) @@ -874,7 +874,7 @@ class ConvolutionModule(nn.Module): bias=bias, ) # shape: (channels, 1), broadcasts with (batch, channel, time). - self.activation = ExpScaleSwish(channels, 1, speed=50.0) + self.activation = ExpScaleSwish(channels, 1, speed=20.0) self.pointwise_conv2 = nn.Conv1d( channels,