From 1e31fbcd7def2e52a3edd7c4ead02c4b884ebfe0 Mon Sep 17 00:00:00 2001 From: marcoyang1998 <45973641+marcoyang1998@users.noreply.github.com> Date: Thu, 25 Aug 2022 12:12:50 +0800 Subject: [PATCH] Add clamping operation in Eve optimizer for all scalar weights to avoid (#550) non stable training in some scenarios. The clamping range is set to (-10,2). Note that this change may cause unexpected effect if you resume training from a model that is trained without clamping. --- egs/librispeech/ASR/pruned_transducer_stateless2/optim.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless2/optim.py index 432bf8220..041a81f45 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/optim.py @@ -164,6 +164,10 @@ class Eve(Optimizer): p.mul_(1 - (weight_decay * is_above_target_rms)) p.addcdiv_(exp_avg, denom, value=-step_size) + # Constrain the range of scalar weights + if p.numel() == 1: + p.clamp_(min=-10, max=2) + return loss