From 146626bb85d6ca8e3b38802a1751c513a84b6b8a Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Sat, 22 Oct 2022 17:44:21 +0800 Subject: [PATCH] Renaming in optim.py; remove step() from scan_pessimistic_batches_for_oom in train.py --- .../ASR/pruned_transducer_stateless7/optim.py | 10 +++++----- .../ASR/pruned_transducer_stateless7/train.py | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py index 8f13a67c5..d188ad123 100644 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/optim.py @@ -562,10 +562,10 @@ class LRScheduler(object): self.verbose = verbose for group in optimizer.param_groups: - group.setdefault("initial_lr", group["lr"]) + group.setdefault("base_lr", group["lr"]) self.base_lrs = [ - group["initial_lr"] for group in optimizer.param_groups + group["base_lr"] for group in optimizer.param_groups ] self.epoch = 0 @@ -647,13 +647,13 @@ class Eden(LRScheduler): """ Eden scheduler. The basic formula (before warmup) is: - lr = initial_lr * (((batch**2 + lr_batches**2) / lr_batches**2) ** -0.25 * - (((epoch**2 + lr_epochs**2) / lr_epochs**2) ** -0.25)) * warmup + lr = base_lr * (((batch**2 + lr_batches**2) / lr_batches**2) ** -0.25 * + (((epoch**2 + lr_epochs**2) / lr_epochs**2) ** -0.25)) * warmup where `warmup` increases from linearly 0.5 to 1 over `warmup_batches` batches and then stays constant at 1. - E.g. suggest initial-lr = 0.04 (passed to optimizer) if used with ScaledAdam + E.g. suggest base_lr = 0.04 (passed to optimizer) if used with ScaledAdam Args: optimizer: the optimizer to change the learning rates on diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py index 1fd059b79..e02bc9182 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py @@ -1156,7 +1156,6 @@ def scan_pessimistic_batches_for_oom( is_training=True, ) loss.backward() - optimizer.step() optimizer.zero_grad() except Exception as e: if "CUDA out of memory" in str(e):