From cdfa388ac081097c31eddc45aecdd12ef91f583d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Thu, 18 May 2023 15:35:23 +0800 Subject: [PATCH] Revert optim schedule --- egs/libriheavy/LM/zipformer1/subformer.py | 11 +---------- .../ASR/pruned_transducer_stateless7/optim.py | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/egs/libriheavy/LM/zipformer1/subformer.py b/egs/libriheavy/LM/zipformer1/subformer.py index 071cf8ac3..e8ad00501 100644 --- a/egs/libriheavy/LM/zipformer1/subformer.py +++ b/egs/libriheavy/LM/zipformer1/subformer.py @@ -875,16 +875,7 @@ class LearnedDownsamplingModule(nn.Module): if random.random() < 0.01 or __name__ == '__main__': logging.info(f"mean weight={weights.mean()}, mean-abs-scores={scores.abs().mean()} positive-scores={(scores>0).to(torch.float32).mean()}, discarded-weights={weights_discarded.mean()}, seq_len={seq_len}, seq_len_reduced={seq_len_reduced}") - - - # randomly rotate `weights_discarded` on the sequence axis; this is - # intended to ensure that it doesn't assign the highest scores to - # not-so-important elements to avoid the randomness of these - # discarded weights. - r = random.randint(0, seq_len_reduced - 1) - weights_discarded = torch.cat((weights_discarded[:, r:], - weights_discarded[:, :r]), - dim=1) + weights_discarded = weights_discarded.flip(dims=1) weights = (weights[:, :seq_len_reduced] - weights_discarded) else: diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index a609a58f8..4d99983f6 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -882,7 +882,7 @@ class Eden(LRScheduler): warmup_factor = ( 1.0 if self.batch >= self.warmup_batches - else 0.1 + 0.9 * (self.batch / self.warmup_batches) + else 0.5 + 0.5 * (self.batch / self.warmup_batches) ) return [x * factor * warmup_factor for x in self.base_lrs]