From 5c3e4f506f59a4d2a3e84afd58a2e16728a107d7 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Fri, 17 Jun 2022 12:17:48 +0800
Subject: [PATCH] Reduce debug frequency

---
 egs/librispeech/ASR/pruned_transducer_stateless7/optim.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
index 48db9ab79..2ee6724cf 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
@@ -301,7 +301,7 @@ class NeutralGradient(Optimizer):
 
                         cur_grad = self._change_coordinates(cur_grad, state, forward=False)
 
-                        if random.random() < 0.004:
+                        if random.random() < 0.001:
                             # in principle, the cur_grad is supposed to have the same rms as params, on average.
                             cur_grad_rms = (cur_grad**2).mean().sqrt()
                             # _corrected corrects for the overall size of the grad, making cur_grad_rms more similar
@@ -311,7 +311,7 @@ class NeutralGradient(Optimizer):
                             param_rms = (p**2).mean().sqrt()
                             print(f"cur_grad_rms={cur_grad_rms.item():.3e}, corrected_grad_rms={cur_grad_rms_corrected.item():.3e}, param_rms={param_rms.item():.3e}")
 
-                        if random.random() < 0.1:
+                        if random.random() < 0.025:
                             # check the cosine angle between cur_grad and grad, to see how different this update
                             # is from gradient descent.
                             prod = (grad*cur_grad).mean()