From 790e8c4ba9b60733d8be2d8bde9698775b1a1253 Mon Sep 17 00:00:00 2001
From: Daniel Povey <dpovey@gmail.com>
Date: Sat, 30 Jul 2022 19:20:36 -0700
Subject: [PATCH] Changes that should not really affect the results, just
 cleanup.

---
 .../ASR/pruned_transducer_stateless7/optim.py | 29 ++-----------------
 1 file changed, 3 insertions(+), 26 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
index 71c2a0c39..0335eca83 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
@@ -737,22 +737,9 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
         if the parameter covariance is just the gradient covariance to some power, this
         function does no smoothing; but if it is highly off-diagonal we do more smoothing.
         """
-        P_prime_diag = _diag(P_prime)  # (batch_size, num_blocks, block_size)
-        eps = 1.0e-10
-        P_prime_diag = (P_prime_diag + eps) / P_prime_diag.mean()
-        # make sure no diagonal element is close to zero.. we don't expect this
-        # would happen.  this is likely not important.  Note, this just used for
-        # normalizing P prior to smoothing.
-        P_prime_diag.clamp_(min=0.01)
-        P_prime_rms = P_prime_diag.sqrt()
-        P_prime_scale = P_prime_rms.unsqueeze(-1) * P_prime_rms.unsqueeze(-2)
-
-        # P_norm will have diagonal elements close to 1.  We do some smoothing
-        # in this space.
-        P_norm = P_prime / P_prime_scale
-        # Now P is as normalized as we can make it... do smoothing baserd on 'rank',
+        # do smoothing based on 'rank',
         # that is intended to compensate for bad estimates of P.
-        (batch_size, num_blocks, block_size, block_size) = P_norm.shape
+        (batch_size, num_blocks, block_size, block_size) = P_prime.shape
         # `rank_per_block` is the rank of each block of P_prime if we were to estimate it from just one
         # parameter tensor.  We average it over time, but actually it won't be changing
         # too much, so `rank` does tell us something.
@@ -771,21 +758,11 @@ param_rms_smooth1: Smoothing proportion for parameter matrix, if assumed rank of
         smooth = smooth0 * block_size / ((smooth0/smooth1 - 1) * rank + block_size)
         if True:
             logging.info(f"block size={block_size}, rank={rank}, smooth={smooth}")
+
         # add rank-dependent smoothing amount to diagonal of P_prime.  _diag() returns an aliased tensor.
         # we don't need to multiply `smooth` by anything, because at this point, P_prime should have
         # diagonal elements close to 1.
 
-        #_diag(P_norm).add_(smooth)
-
-        #P_norm = self._smooth_cov(P_norm,
-        #                          group["cov_min"][0],
-        #                          group["cov_max"][0],
-        #                          group["cov_pow"][0])
-
-        # Remove the diagonal preconditioning on P_norm, giving us stage-1-smoothed
-        # version of P_prime.
-        P_prime = P_norm * P_prime_scale
-
         P_prime_diag = _diag(P_prime)
         P_prime_diag_mean = _mean(P_prime_diag, exclude_dims=[0], keepdim=True)
         P_prime_diag += smooth * P_prime_diag_mean