From 65bc964854219677368fa1bf4f04a2acdbd1066f Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 9 Jul 2022 10:14:20 +0800 Subject: [PATCH] Fix bug for scalar update --- egs/librispeech/ASR/pruned_transducer_stateless7/optim.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index 3b57c37e8..21c4145d4 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -497,7 +497,6 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of # and project back.. grad = self._project(grad, state, forward=False) - scalar_exp_avg_sq = state["scalar_exp_avg_sq"] scalar_exp_avg_sq.mul_(beta2).add_((grad**2).mean(), alpha=1-beta2) @@ -525,11 +524,10 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of grad: Tensor, state: dict): """ - A form of the core update for tensors with a small number of elements, - e.g. scalars. This is Adam where, if the numel() > 1, the learning rate - is proportional to the parameter rms value. + A form of the core update for scalar tensors, where we cannot get a good + estimate of the parameter rms. """ - exp_avg_sq = state["scalar_exp_avg_sq"] + exp_avg_sq = state["exp_avg_sq"] exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1-beta2)