diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py index aa842a31f..ba0f08271 100644 --- a/egs/librispeech/ASR/conformer_ctc/subsampling.py +++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py @@ -47,15 +47,15 @@ class Conv2dSubsampling(nn.Module): nn.Conv2d( in_channels=1, out_channels=odim, kernel_size=3, stride=2 ), - DerivBalancer(channel_dim=1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=1, threshold=0.05, + max_factor=0.025), nn.ReLU(), ExpScale(odim, 1, 1, speed=20.0), nn.Conv2d( in_channels=odim, out_channels=odim, kernel_size=3, stride=2 ), - DerivBalancer(channel_dim=1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=1, threshold=0.05, + max_factor=0.025), nn.ReLU(), ExpScale(odim, 1, 1, speed=20.0), ) diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py index 056958ff6..42d159ff5 100644 --- a/egs/librispeech/ASR/transducer_stateless/conformer.py +++ b/egs/librispeech/ASR/transducer_stateless/conformer.py @@ -156,8 +156,8 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward = nn.Sequential( nn.Linear(d_model, dim_feedforward), - DerivBalancer(channel_dim=-1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=-1, threshold=0.05, + max_factor=0.025), ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model), @@ -165,8 +165,8 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward_macaron = nn.Sequential( nn.Linear(d_model, dim_feedforward), - DerivBalancer(channel_dim=-1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=-1, threshold=0.05, + max_factor=0.025), ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model),