Reduce top grad_scale limit from 128 to 32.

This commit is contained in:
Daniel Povey 2022-12-08 18:36:29 +08:00
parent f4f3d057e7
commit 6e598cb18d

View File

@ -912,7 +912,7 @@ def train_one_epoch(
# behavior depending on the current grad scale. # behavior depending on the current grad scale.
cur_grad_scale = scaler._scale.item() cur_grad_scale = scaler._scale.item()
if cur_grad_scale < 8.0 or (cur_grad_scale < 128.0 and batch_idx % 400 == 0): if cur_grad_scale < 8.0 or (cur_grad_scale < 32.0 and batch_idx % 400 == 0):
scaler.update(cur_grad_scale * 2.0) scaler.update(cur_grad_scale * 2.0)
if cur_grad_scale < 0.01: if cur_grad_scale < 0.01:
logging.warning(f"Grad scale is small: {cur_grad_scale}") logging.warning(f"Grad scale is small: {cur_grad_scale}")