mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
Changes to schedules: _whitening_schedule longer, min_abs schedule on attention_squeeze+nonlin_attention shorter; dip in conv_skip_rate.
This commit is contained in:
parent
864ff96322
commit
3213c18a22
@ -355,7 +355,7 @@ class Zipformer(EncoderInterface):
|
|||||||
|
|
||||||
def _whitening_schedule(x: float, ratio: float = 2.0) -> ScheduledFloat:
|
def _whitening_schedule(x: float, ratio: float = 2.0) -> ScheduledFloat:
|
||||||
return ScheduledFloat((0.0, x),
|
return ScheduledFloat((0.0, x),
|
||||||
(12000.0, ratio * x),
|
(20000.0, ratio * x),
|
||||||
default=x)
|
default=x)
|
||||||
|
|
||||||
def _aux_grad_scale() -> float:
|
def _aux_grad_scale() -> float:
|
||||||
@ -399,7 +399,7 @@ class ZipformerEncoderLayer(nn.Module):
|
|||||||
# to work correctly.
|
# to work correctly.
|
||||||
layer_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.05), default=0),
|
layer_skip_rate: FloatLike = ScheduledFloat((0.0, 0.5), (4000.0, 0.05), default=0),
|
||||||
attention_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.0), default=0),
|
attention_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.0), default=0),
|
||||||
conv_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (16000, 0.0), default=0),
|
conv_skip_rate: FloatLike = ScheduledFloat((0.0, 0.2), (4000.0, 0.05), (16000, 0.0), default=0),
|
||||||
const_attention_rate: FloatLike = ScheduledFloat((0.0, 0.25), (4000.0, 0.025), default=0),
|
const_attention_rate: FloatLike = ScheduledFloat((0.0, 0.25), (4000.0, 0.025), default=0),
|
||||||
bypass_min: FloatLike = ScheduledFloat((0.0, 0.75), (20000.0, 0.2), default=0),
|
bypass_min: FloatLike = ScheduledFloat((0.0, 0.75), (20000.0, 0.2), default=0),
|
||||||
bypass_max: FloatLike = 1.0,
|
bypass_max: FloatLike = 1.0,
|
||||||
@ -1408,7 +1408,7 @@ class AttentionSqueeze(nn.Module):
|
|||||||
self.out_balancer = ActivationBalancer(
|
self.out_balancer = ActivationBalancer(
|
||||||
embed_dim, channel_dim=-1,
|
embed_dim, channel_dim=-1,
|
||||||
min_positive=0.3, max_positive=0.7,
|
min_positive=0.3, max_positive=0.7,
|
||||||
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.005)),
|
min_abs=ScheduledFloat((0.0, 0.001), (4000.0, 0.005)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1543,7 +1543,7 @@ class NonlinAttentionModule(nn.Module):
|
|||||||
self.balancer2 = ActivationBalancer(
|
self.balancer2 = ActivationBalancer(
|
||||||
channels, channel_dim=-1,
|
channels, channel_dim=-1,
|
||||||
min_positive=0.3, max_positive=0.7,
|
min_positive=0.3, max_positive=0.7,
|
||||||
min_abs=ScheduledFloat((0.0, 0.001), (8000.0, 0.005)),
|
min_abs=ScheduledFloat((0.0, 0.001), (4000.0, 0.005)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user