From ccc38a97f702ce32760ebe34f070cb0600511e5b Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 5 Jan 2023 18:50:04 +0800 Subject: [PATCH] Reduce lr_scales of soem sub modules --- .../ASR/pruned_transducer_stateless7/zipformer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py index e0f073d13..75cf6dac7 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/zipformer.py @@ -1086,6 +1086,7 @@ class RelPositionMultiheadAttentionWeights(nn.Module): (4000.0, 0.0)) ) -> None: super().__init__() + self.lr_scale = 0.75 self.embed_dim = embed_dim self.num_heads = num_heads self.query_head_dim = query_head_dim @@ -1336,6 +1337,9 @@ class AttentionSqueeze(nn.Module): hidden_dim: int, bottleneck_dim: int = 16): super().__init__() + + self.lr_scale = 0.5 + self.bottleneck_dim = bottleneck_dim self.in_proj = nn.Linear(embed_dim, hidden_dim, @@ -1476,6 +1480,8 @@ class NonlinAttention(nn.Module): ) -> None: super().__init__() + self.lr_scale = 0.75 + self.hidden_channels = hidden_channels self.in_proj = nn.Linear(channels, hidden_channels * 2, bias=True)