diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index 197fa5ba1..7afb29690 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -67,7 +67,7 @@ class LearnedGradient(Optimizer): param_min_rms=1.0e-05, param_max_rms=2.0, lr_mat_min=0.01, - lr_mat_max=4.0, + lr_mat_max=10.0, lr_est_period=2, diagonalize_period=4, ): @@ -474,7 +474,8 @@ class LearnedGradient(Optimizer): # are the final changes, the only 2 we make in this loop that have # side effects. - # delta_scale < 1 will make it update the learning rates faster than it otherwise would. + # delta_scale < 1 will make it update the learning rates faster than it otherwise would, + # as we'll reach equilbrium with M less rapidly. delta_scale=1.0 delta.add_(this_delta, alpha=-delta_scale*meta_lr*(1-beta1)) # there is no momentum on Q.