From 65b09dd5f22f72923289fd68c1641ecd33fa0c52 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 5 Mar 2022 00:07:14 +0800 Subject: [PATCH] Double the threshold in brelu; slightly increase max_factor. --- egs/librispeech/ASR/conformer_ctc/subsampling.py | 8 ++++---- egs/librispeech/ASR/transducer_stateless/conformer.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/egs/librispeech/ASR/conformer_ctc/subsampling.py b/egs/librispeech/ASR/conformer_ctc/subsampling.py index aa842a31f..ba0f08271 100644 --- a/egs/librispeech/ASR/conformer_ctc/subsampling.py +++ b/egs/librispeech/ASR/conformer_ctc/subsampling.py @@ -47,15 +47,15 @@ class Conv2dSubsampling(nn.Module): nn.Conv2d( in_channels=1, out_channels=odim, kernel_size=3, stride=2 ), - DerivBalancer(channel_dim=1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=1, threshold=0.05, + max_factor=0.025), nn.ReLU(), ExpScale(odim, 1, 1, speed=20.0), nn.Conv2d( in_channels=odim, out_channels=odim, kernel_size=3, stride=2 ), - DerivBalancer(channel_dim=1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=1, threshold=0.05, + max_factor=0.025), nn.ReLU(), ExpScale(odim, 1, 1, speed=20.0), ) diff --git a/egs/librispeech/ASR/transducer_stateless/conformer.py b/egs/librispeech/ASR/transducer_stateless/conformer.py index 056958ff6..42d159ff5 100644 --- a/egs/librispeech/ASR/transducer_stateless/conformer.py +++ b/egs/librispeech/ASR/transducer_stateless/conformer.py @@ -156,8 +156,8 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward = nn.Sequential( nn.Linear(d_model, dim_feedforward), - DerivBalancer(channel_dim=-1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=-1, threshold=0.05, + max_factor=0.025), ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model), @@ -165,8 +165,8 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward_macaron = nn.Sequential( nn.Linear(d_model, dim_feedforward), - DerivBalancer(channel_dim=-1, threshold=0.02, - max_factor=0.02), + DerivBalancer(channel_dim=-1, threshold=0.05, + max_factor=0.025), ExpScaleSwish(dim_feedforward, speed=20.0), nn.Dropout(dropout), nn.Linear(dim_feedforward, d_model),