mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-07 08:04:18 +00:00
make biases learn faster in a different way.
This commit is contained in:
parent
992f922f81
commit
dcab1aee4e
@ -179,7 +179,7 @@ class ScaledLinear(nn.Linear):
|
|||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
self.weight[:] *= initial_scale
|
self.weight[:] *= initial_scale
|
||||||
if self.bias is not None:
|
if self.bias is not None:
|
||||||
self.bias[:] *= initial_scale
|
self.bias[:] *= initial_scale * 4.0
|
||||||
|
|
||||||
def get_weight(self): # not needed any more but kept for back compatibility
|
def get_weight(self): # not needed any more but kept for back compatibility
|
||||||
return self.weight
|
return self.weight
|
||||||
|
@ -871,11 +871,7 @@ def run(rank, world_size, args):
|
|||||||
logging.info("Using DDP")
|
logging.info("Using DDP")
|
||||||
model = DDP(model, device_ids=[rank])
|
model = DDP(model, device_ids=[rank])
|
||||||
|
|
||||||
|
optimizer = Cain(model.parameters(), lr=params.initial_lr)
|
||||||
params_to_pass = [ {'params': [ p for (name,p) in model.named_parameters() if 'bias' not in name] },
|
|
||||||
{'params': [ p for (name,p) in model.named_parameters() if 'bias' in name ], 'lr': params.initial_lr*2.0 } ]
|
|
||||||
|
|
||||||
optimizer = Cain(params_to_pass, lr=params.initial_lr)
|
|
||||||
|
|
||||||
scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs)
|
scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user