Revert optim schedule

2025-09-19 05:54:20 +00:00 · 2023-05-18 15:35:23 +08:00 · 2023-05-18 15:35:23 +08:00 · cdfa388ac0
commit cdfa388ac0
parent 299482d02d
2 changed files with 2 additions and 11 deletions
--- a/egs/libriheavy/LM/zipformer1/subformer.py
+++ b/egs/libriheavy/LM/zipformer1/subformer.py
@ -875,16 +875,7 @@ class LearnedDownsamplingModule(nn.Module):
            if random.random() < 0.01 or __name__ == '__main__':
                logging.info(f"mean weight={weights.mean()}, mean-abs-scores={scores.abs().mean()} positive-scores={(scores>0).to(torch.float32).mean()}, discarded-weights={weights_discarded.mean()}, seq_len={seq_len}, seq_len_reduced={seq_len_reduced}")

-
-
-            # randomly rotate `weights_discarded` on the sequence axis; this is
-            # intended to ensure that it doesn't assign the highest scores to
-            # not-so-important elements to avoid the randomness of these
-            # discarded weights.
-            r = random.randint(0, seq_len_reduced - 1)
-            weights_discarded = torch.cat((weights_discarded[:, r:],
-                                           weights_discarded[:, :r]),
-                                          dim=1)
+            weights_discarded = weights_discarded.flip(dims=1)

            weights = (weights[:, :seq_len_reduced] - weights_discarded)
        else:
--- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py
@ -882,7 +882,7 @@ class Eden(LRScheduler):
        warmup_factor = (
            1.0
            if self.batch >= self.warmup_batches
-            else 0.1 + 0.9 * (self.batch / self.warmup_batches)
+            else 0.5 + 0.5 * (self.batch / self.warmup_batches)
        )

        return [x * factor * warmup_factor for x in self.base_lrs]