Reduce meta_lr_scale, reduces loss @140 from 1.4 to 0.39
This commit is contained in:
parent
b6199a71e9
commit
97feb8a3ec
@ -60,7 +60,7 @@ class LearnedGradient(Optimizer):
|
|||||||
params,
|
params,
|
||||||
lr=3e-02,
|
lr=3e-02,
|
||||||
size_lr_scale=0.1,
|
size_lr_scale=0.1,
|
||||||
meta_lr_scale=0.2,
|
meta_lr_scale=0.1,
|
||||||
betas=(0.9, 0.98),
|
betas=(0.9, 0.98),
|
||||||
eps=1.0e-08,
|
eps=1.0e-08,
|
||||||
size_update_period=1,
|
size_update_period=1,
|
||||||
@ -347,7 +347,7 @@ class LearnedGradient(Optimizer):
|
|||||||
# (group["lr_est_period"] ** 0.5) is intended to make it approximately invariant
|
# (group["lr_est_period"] ** 0.5) is intended to make it approximately invariant
|
||||||
# to the lr_est_period (close to convergence).
|
# to the lr_est_period (close to convergence).
|
||||||
meta_lr = group["lr"] * group["meta_lr_scale"] * (group["lr_est_period"] ** 0.5)
|
meta_lr = group["lr"] * group["meta_lr_scale"] * (group["lr_est_period"] ** 0.5)
|
||||||
beta1, beta2 = group["betas"]
|
beta1 = group["betas"][0]
|
||||||
eps = group["eps"]
|
eps = group["eps"]
|
||||||
delta = state["delta"]
|
delta = state["delta"]
|
||||||
ndim = p.ndim
|
ndim = p.ndim
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user