diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py index 300f0f0f8..20d126e07 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/train.py @@ -66,7 +66,7 @@ from lhotse.cut import Cut from lhotse.dataset.sampling.base import CutSampler from lhotse.utils import fix_random_seed from model import Transducer -from optim import Eden, NeutralGradient +from optim import Eden, PrAdam from torch import Tensor from torch.cuda.amp import GradScaler from torch.nn.parallel import DistributedDataParallel as DDP @@ -926,9 +926,8 @@ def run(rank, world_size, args): logging.info("Using DDP") model = DDP(model, device_ids=[rank]) - optimizer = NeutralGradient(model.parameters(), - lr=params.initial_lr, - lr_for_speedup=params.initial_lr) + optimizer = PrAdam(model.parameters(), + lr=params.initial_lr) scheduler = Eden(optimizer, params.lr_batches, params.lr_epochs)