Loosen some limits of activation balancers
This commit is contained in:
parent
3122637266
commit
76031a7c1d
@ -173,7 +173,7 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
|
|
||||||
self.feed_forward = nn.Sequential(
|
self.feed_forward = nn.Sequential(
|
||||||
nn.Linear(d_model, dim_feedforward),
|
nn.Linear(d_model, dim_feedforward),
|
||||||
ActivationBalancer(channel_dim=-1, max_abs=3.0),
|
ActivationBalancer(channel_dim=-1, max_abs=10.0),
|
||||||
DoubleSwish(),
|
DoubleSwish(),
|
||||||
nn.Dropout(dropout),
|
nn.Dropout(dropout),
|
||||||
ScaledLinear(dim_feedforward, d_model,
|
ScaledLinear(dim_feedforward, d_model,
|
||||||
@ -182,7 +182,7 @@ class ConformerEncoderLayer(nn.Module):
|
|||||||
|
|
||||||
self.feed_forward_macaron = nn.Sequential(
|
self.feed_forward_macaron = nn.Sequential(
|
||||||
nn.Linear(d_model, dim_feedforward),
|
nn.Linear(d_model, dim_feedforward),
|
||||||
ActivationBalancer(channel_dim=-1, max_abs=3.0),
|
ActivationBalancer(channel_dim=-1, max_abs=10.0),
|
||||||
DoubleSwish(),
|
DoubleSwish(),
|
||||||
nn.Dropout(dropout),
|
nn.Dropout(dropout),
|
||||||
ScaledLinear(dim_feedforward, d_model,
|
ScaledLinear(dim_feedforward, d_model,
|
||||||
@ -465,7 +465,8 @@ class RelPositionMultiheadAttention(nn.Module):
|
|||||||
|
|
||||||
self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
|
self.in_proj = nn.Linear(embed_dim, 3 * embed_dim, bias=True)
|
||||||
self.in_balancer = ActivationBalancer(channel_dim=-1, max_abs=5.0)
|
self.in_balancer = ActivationBalancer(channel_dim=-1, max_abs=5.0)
|
||||||
self.proj_balancer = ActivationBalancer(channel_dim=-1, max_abs=10.0)
|
self.proj_balancer = ActivationBalancer(channel_dim=-1, max_abs=10.0,
|
||||||
|
min_positive=0.0, max_positive=1.0)
|
||||||
self.out_proj = ScaledLinear(
|
self.out_proj = ScaledLinear(
|
||||||
embed_dim, embed_dim, bias=True, initial_scale=0.5
|
embed_dim, embed_dim, bias=True, initial_scale=0.5
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user