diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
index 49c205108..b4d48775b 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
@@ -395,7 +395,7 @@ class NeutralGradient(Optimizer):
 
                         cur_grad = self._change_coordinates(cur_grad, state, forward=False)
 
-                        if random.random() < 0.0002:
+                        if random.random() < 0.00005:
                             # This is only for debug.  The logic below would not be valid for n_cache_grads>0,
                             # anyway we will delete this code at some point.
                             # in principle, the cur_grad is supposed to have the same rms as params, on average.
@@ -407,7 +407,7 @@ class NeutralGradient(Optimizer):
                             param_rms = (p**2).mean().sqrt()
                             print(f"cur_grad_rms={cur_grad_rms.item():.3e}, corrected_grad_rms={cur_grad_rms_corrected.item():.3e}, param_rms={param_rms.item():.3e}")
 
-                        if random.random() < 0.001:
+                        if random.random() < 0.0005:
                             # check the cosine angle between cur_grad and grad, to see how different this update
                             # is from gradient descent.
                             prod = (grad*cur_grad).mean()
@@ -735,7 +735,7 @@ class NeutralGradient(Optimizer):
 
             R_scale = R.diag().mean() + 1.0e-20
             cov_scale = cov.diag().mean() + 1.0e-20
-            if random.random() < 0.1:
+            if random.random() < 0.02:
                 print(f"required_rank={required_rank}, rank={rank}, size={size}, R_scale={R_scale}, rand_scale={rand_scale}, shape={shape}")
             cov.add_(R, alpha=rand_scale * cov_scale / R_scale)
 
@@ -816,7 +816,7 @@ class NeutralGradient(Optimizer):
 
         P = torch.matmul(Y, Y.t())
 
-        if random.random() < 0.1:
+        if random.random() < 0.025:
 
             # TEMP:
             _,s,_ = P.svd()
@@ -909,7 +909,7 @@ class NeutralGradient(Optimizer):
         param_periods = param_periods.tolist()
 
         logging.info(f"NeutralGradient._recalibrate_speedup: speedup = {speedup:.2g}, actual_speedup = {actual_speedup:.2g}")
-        print_info = random.random() < 0.05
+        print_info = random.random() < 0.01
         i = 0
         for p in group["params"]:
             if p.grad is None:
@@ -1150,7 +1150,7 @@ class Cain(Optimizer):
                     this_delta = grad / denom
                     alpha = -lr*(1-beta1)*(bias_correction2 ** 0.5)
                     delta.add_(this_delta, alpha=alpha)
-                if random.random() < 0.0005:
+                if random.random() < 0.0001:
                     print(f"Delta rms = {(delta**2).mean().item()}, shape = {delta.shape}")
 
                 p.add_(delta)