mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-07 08:04:18 +00:00
keep model_avg on cpu
This commit is contained in:
parent
70634d58a1
commit
c3bb03253f
@ -867,10 +867,6 @@ def run(rank, world_size, args):
|
|||||||
model = DDP(model, device_ids=[rank])
|
model = DDP(model, device_ids=[rank])
|
||||||
model.device = device
|
model.device = device
|
||||||
|
|
||||||
if rank == 0:
|
|
||||||
model_avg.to(device)
|
|
||||||
model_avg.device = device
|
|
||||||
|
|
||||||
optimizer = Eve(model.parameters(), lr=params.initial_lr)
|
optimizer = Eve(model.parameters(), lr=params.initial_lr)
|
||||||
|
|
||||||
scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs)
|
scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs)
|
||||||
|
@ -467,5 +467,7 @@ def average_state_dict(
|
|||||||
uniqued_names = list(uniqued.values())
|
uniqued_names = list(uniqued.values())
|
||||||
for k in uniqued_names:
|
for k in uniqued_names:
|
||||||
state_dict_1[k] *= weight_1
|
state_dict_1[k] *= weight_1
|
||||||
state_dict_1[k] += state_dict_2[k] * weight_2
|
state_dict_1[k] += (
|
||||||
|
state_dict_2[k].to(device=state_dict_1[k].device) * weight_2
|
||||||
|
)
|
||||||
state_dict_1[k] *= scaling_factor
|
state_dict_1[k] *= scaling_factor
|
||||||
|
Loading…
x
Reference in New Issue
Block a user