From 209acaf6e4985f01448e135153bc8ebdf9a60a52 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 9 Jul 2022 11:36:54 +0800 Subject: [PATCH] Increase lr_update_period to 200. The update takes about 2 minutes, fore entire model. --- egs/librispeech/ASR/pruned_transducer_stateless7/optim.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index 61be0ddb7..00d63d2e4 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -76,7 +76,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of param_min_rms=1.0e-05, param_max_rms=2.0, size_update_period=4, - lr_update_period=20, + lr_update_period=200, grad_cov_period=3, ): @@ -190,7 +190,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of # param_cov_{dim} is the averaged-over-time gradient of parameters on this dimension, treating # all other dims as a batch axis. - state[f"param_cov_{dim}"] = torch.zeros(size, size, **kwargs) + state[f"param_cov_{dim}"] = torch.zeros(size, size, **kwargs) # grad_cov_{dim} is the covariance of gradients on this axis (without # any co-ordinate changes), treating all other axes as as a batch axis. @@ -205,9 +205,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of state[f"grad_cov_{dim}"] = torch.zeros(size, size, **kwargs) - step = state["step"] - delta = state["delta"] delta.mul_(beta1) numel = p.numel()