diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index e0f073d13..75cf6dac7 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1086,6 +1086,7 @@ class RelPositionMultiheadAttentionWeights(nn.Module): (4000.0, 0.0)) ) -> None: super().__init__() + self.lr_scale = 0.75 self.embed_dim = embed_dim self.num_heads = num_heads self.query_head_dim = query_head_dim @@ -1336,6 +1337,9 @@ class AttentionSqueeze(nn.Module): hidden_dim: int, bottleneck_dim: int = 16): super().__init__() + + self.lr_scale = 0.5 + self.bottleneck_dim = bottleneck_dim self.in_proj = nn.Linear(embed_dim, hidden_dim, @@ -1476,6 +1480,8 @@ class NonlinAttention(nn.Module): ) -> None: super().__init__() + self.lr_scale = 0.75 + self.hidden_channels = hidden_channels self.in_proj = nn.Linear(channels, hidden_channels * 2, bias=True)