enable the grad_scale is too small error

This commit is contained in:
jinzr 2024-03-11 23:14:14 +08:00
parent 89632bf748
commit e69b60e579

View File

@ -515,9 +515,9 @@ def train_one_epoch(
if cur_grad_scale < 1.0e-05:
save_bad_model()
logging.warning(f"Grad scale is small: {cur_grad_scale}")
# raise RuntimeError(
# f"grad_scale is too small, exiting: {cur_grad_scale}"
# )
raise RuntimeError(
f"grad_scale is too small, exiting: {cur_grad_scale}"
)
if params.batch_idx_train % params.log_interval == 0:
cur_lr_g = max(scheduler_g.get_last_lr())