only match the exact module prefix

This commit is contained in:
marcoyang 2023-10-18 17:23:05 +08:00
parent 23375cb64f
commit 75ab9d7d02
2 changed files with 21 additions and 10 deletions

View File

@ -655,8 +655,12 @@ def load_model_params(
dst_state_dict = model.state_dict() dst_state_dict = model.state_dict()
for module in init_modules: for module in init_modules:
logging.info(f"Loading parameters starting with prefix {module}") logging.info(f"Loading parameters starting with prefix {module}")
src_keys = [k for k in src_state_dict.keys() if k.startswith(module)] src_keys = [
dst_keys = [k for k in dst_state_dict.keys() if k.startswith(module)] k for k in src_state_dict.keys() if k.startswith(module.strip() + ".")
]
dst_keys = [
k for k in dst_state_dict.keys() if k.startswith(module.strip() + ".")
]
assert set(src_keys) == set(dst_keys) # two sets should match exactly assert set(src_keys) == set(dst_keys) # two sets should match exactly
for key in src_keys: for key in src_keys:
dst_state_dict[key] = src_state_dict.pop(key) dst_state_dict[key] = src_state_dict.pop(key)
@ -1077,23 +1081,26 @@ def run(rank, world_size, args):
num_param = sum([p.numel() for p in model.parameters()]) num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}") logging.info(f"Number of model parameters: {num_param}")
assert params.save_every_n >= params.average_period
model_avg: Optional[nn.Module] = None
if rank == 0:
# model_avg is only used with rank 0
model_avg = copy.deepcopy(model).to(torch.float64)
# load model parameters for model fine-tuning # load model parameters for model fine-tuning
if params.do_finetune: if params.do_finetune:
modules = params.init_modules.split(",") if params.init_modules else None modules = params.init_modules.split(",") if params.init_modules else None
checkpoints = load_model_params( checkpoints = load_model_params(
ckpt=params.finetune_ckpt, model=model, init_modules=modules ckpt=params.finetune_ckpt, model=model, init_modules=modules
) )
if rank == 0:
# model_avg is only used with rank 0
model_avg = copy.deepcopy(model).to(torch.float64)
else: else:
assert params.start_epoch > 0, params.start_epoch assert params.start_epoch > 0, params.start_epoch
checkpoints = load_checkpoint_if_available( checkpoints = load_checkpoint_if_available(
params=params, model=model, model_avg=model_avg params=params, model=model, model_avg=model_avg
) )
assert params.save_every_n >= params.average_period
model_avg: Optional[nn.Module] = None
if rank == 0:
# model_avg is only used with rank 0
model_avg = copy.deepcopy(model).to(torch.float64)
model.to(device) model.to(device)
if world_size > 1: if world_size > 1:

View File

@ -498,8 +498,12 @@ def load_model_params(
dst_state_dict = model.state_dict() dst_state_dict = model.state_dict()
for module in init_modules: for module in init_modules:
logging.info(f"Loading parameters starting with prefix {module}") logging.info(f"Loading parameters starting with prefix {module}")
src_keys = [k for k in src_state_dict.keys() if k.startswith(module)] src_keys = [
dst_keys = [k for k in dst_state_dict.keys() if k.startswith(module)] k for k in src_state_dict.keys() if k.startswith(module.strip() + ".")
]
dst_keys = [
k for k in dst_state_dict.keys() if k.startswith(module.strip() + ".")
]
assert set(src_keys) == set(dst_keys) # two sets should match exactly assert set(src_keys) == set(dst_keys) # two sets should match exactly
for key in src_keys: for key in src_keys:
dst_state_dict[key] = src_state_dict.pop(key) dst_state_dict[key] = src_state_dict.pop(key)