From e2cc09a8c6bb79f33e11337f8174221be061e966 Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sun, 31 Jul 2022 18:29:44 +0800 Subject: [PATCH] Fix issue with max_eig formula; restore cov_min[1]=0.0025. --- .../ASR/pruned_transducer_stateless7/optim.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index 5b482c89b..494894545 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -163,7 +163,7 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of lr=3e-02, betas=(0.9, 0.98), size_lr_scale=0.1, - cov_min=(0.025, 0.0, 0.02, 0.0001), + cov_min=(0.025, 0.0025, 0.02, 0.0001), cov_max=(10.0, 80.0, 5.0, 400.0), cov_pow=(1.0, 1.0, 1.0, 1.0), param_rms_smooth0=0.4, @@ -830,8 +830,8 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of # which case X is extremely tiny). # eig_ceil is the maximum possible eigenvalue that X could possibly - # have at this time. - eig_ceil = X.shape[-1] + # have at this time, equal to num_blocks * block_size. + eig_ceil = X.shape[1] * X.shape[3] # the next statement wslightly adjusts the target to be the same as # what the baseline function, eig -> 1./(1./eig + 1./max_eig) would @@ -886,10 +886,11 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of X /= mean_eig if min_eig != 0.0: - # should be inverting as block-diag.. - X += min_eig * M.inverse() + X = X * (1.0-min_eig) + min_eig * M.inverse() - eig_ceil = X.shape[-1] + # eig_ceil is the maximum possible eigenvalue that X could possibly + # have at this time, equal to num_blocks * block_size. + eig_ceil = X.shape[1] * X.shape[3] # the next statement wslightly adjusts the target to be the same as # what the baseline function, eig -> 1./(1./eig + 1./max_eig) would @@ -1859,7 +1860,7 @@ def _test_eve_cain(): fix_random_seed(42) Linear = torch.nn.Linear if iter == 0 else ScaledLinear - hidden_dim = 200 + hidden_dim = 300 m = torch.nn.Sequential(Linear(E, hidden_dim), torch.nn.PReLU(), Linear(hidden_dim, hidden_dim),