From 76031a7c1d2ef2db4c7634c7f6e1ad73de54c9bb Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 18 Sep 2022 13:58:19 +0800 Subject: [PATCH] Loosen some limits of activation balancers --- .../ASR/pruned_transducer_stateless7/conformer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py index 63adfa792..57302b0cd 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/conformer.py @@ -173,7 +173,7 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward = nn.Sequential( nn.Linear(d_model, dim_feedforward), - ActivationBalancer(channel_dim=-1, max_abs=3.0), + ActivationBalancer(channel_dim=-1, max_abs=10.0), DoubleSwish(), nn.Dropout(dropout), ScaledLinear(dim_feedforward, d_model, @@ -182,7 +182,7 @@ class ConformerEncoderLayer(nn.Module): self.feed_forward_macaron = nn.Sequential( nn.Linear(d_model, dim_feedforward), - ActivationBalancer(channel_dim=-1, max_abs=3.0), + ActivationBalancer(channel_dim=-1, max_abs=10.0), DoubleSwish(), nn.Dropout(dropout), ScaledLinear(dim_feedforward, d_model, @@ -465,7 +465,8 @@ class RelPositionMultiheadAttention(nn.Module): self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True) self.in_balancer = ActivationBalancer(channel_dim=-1, max_abs=5.0) - self.proj_balancer = ActivationBalancer(channel_dim=-1, max_abs=10.0) + self.proj_balancer = ActivationBalancer(channel_dim=-1, max_abs=10.0, + min_positive=0.0, max_positive=1.0) self.out_proj = ScaledLinear( embed_dim, embed_dim, bias=True, initial_scale=0.5 )