from local

This commit is contained in:
dohe0342 2022-12-14 16:54:26 +09:00
parent 669053c0d8
commit 5a4f4acd05
2 changed files with 1 additions and 0 deletions

View File

@ -1081,6 +1081,7 @@ def train_one_epoch(
if cur_grad_scale < 0.01:
logging.warning(f"Grad scale is small: {cur_grad_scale}")
if cur_grad_scale < 1.0e-05:
wb.log({"valid/loss": 10000})
raise RuntimeError(
f"grad_scale is too small, exiting: {cur_grad_scale}"
)