diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
index 62d30e65c..1ea201fce 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
@@ -93,7 +93,7 @@ class NeutralGradient(Optimizer):
             estimate_period=2000,
             stats_steps=200,
             param_pow=1.0,
-            grad_min_rand=0.2,
+            grad_min_rand=0.0,
             lr_for_speedup=0.03,
             speedup_recalibrate_period=100,
             speedup_first_step=500,
@@ -512,7 +512,8 @@ class NeutralGradient(Optimizer):
                 grad_cov = state[f"grad_cov_{dim}"] / count
                 del state[f"grad_cov_{dim}"]  # save memory
 
-                self._randomize_lowrank_cov(grad_cov, count, p.shape)
+                self._randomize_lowrank_cov(grad_cov, count, p.shape,
+                                            grad_min_rand)
 
                 param_cov = self._get_param_cov(p, dim)
 
@@ -703,22 +704,27 @@ class NeutralGradient(Optimizer):
         size = cov.shape[0]
         required_rank = int(size * 1.2) + 1
 
-        if rank  < required_rank:
+        rand_scale = min_rand
+        if rank < required_rank:
             # This epsilon is to avoid division by zero in weird situations where the
             # params are exactly zero or we sample a zero matrix; the exact value is not
             # going to affect the performance.
             eps = 1.0e-20
-            cov_scale = cov.diag().mean() + 1.0e-20
 
             # The following formula assumes that the "missing" outer products are
             # expected to be smaller than the ones that we do have, i.e. 0.2 the size.
             # We are trying to find the trace of the matrix we need to add,
             # i.e. how big it is relative to the trace of the existing matrix.
             missing_rank = (required_rank - rank)
-            rand_scale = max(min_rand, 0.2 * missing_rank / rank)
+            rand_scale = max(rand_scale, 0.2 * missing_rank / rank)
+        if rand_scale > 0.0:
             R = torch.randn(size, size, device=cov.device, dtype=cov.dtype)
             R = torch.matmul(R, R.t()) # positive semidefinite random matrix
+            rms_diag = (cov.diag() + 1.0e-20).sqrt()
+            R = R * rms_diag * rms_diag.unsqueeze(-1)
+
             R_scale = R.diag().mean() + 1.0e-20
+            cov_scale = cov.diag().mean() + 1.0e-20
             if random.random() < 0.1:
                 print(f"required_rank={required_rank}, rank={rank}, size={size}, R_scale={R_scale}, rand_scale={rand_scale}, shape={shape}")
             cov.add_(R, alpha=rand_scale * cov_scale / R_scale)