mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-11 06:55:27 +00:00
Reduce lr_scales of soem sub modules
This commit is contained in:
parent
90c02b471c
commit
ccc38a97f7
@ -1086,6 +1086,7 @@ class RelPositionMultiheadAttentionWeights(nn.Module):
|
||||
(4000.0, 0.0))
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.lr_scale = 0.75
|
||||
self.embed_dim = embed_dim
|
||||
self.num_heads = num_heads
|
||||
self.query_head_dim = query_head_dim
|
||||
@ -1336,6 +1337,9 @@ class AttentionSqueeze(nn.Module):
|
||||
hidden_dim: int,
|
||||
bottleneck_dim: int = 16):
|
||||
super().__init__()
|
||||
|
||||
self.lr_scale = 0.5
|
||||
|
||||
self.bottleneck_dim = bottleneck_dim
|
||||
|
||||
self.in_proj = nn.Linear(embed_dim, hidden_dim,
|
||||
@ -1476,6 +1480,8 @@ class NonlinAttention(nn.Module):
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
self.lr_scale = 0.75
|
||||
|
||||
self.hidden_channels = hidden_channels
|
||||
|
||||
self.in_proj = nn.Linear(channels, hidden_channels * 2, bias=True)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user