diff --git a/egs/librispeech/ASR/.run_v3.sh.swp b/egs/librispeech/ASR/.run_v3.sh.swp index a1471bcc9..594962fdb 100644 Binary files a/egs/librispeech/ASR/.run_v3.sh.swp and b/egs/librispeech/ASR/.run_v3.sh.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp index 69093f155..cd7d73fdb 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py index 0c299c744..33bc35195 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py @@ -1013,6 +1013,7 @@ def train_one_epoch( scaler.scale(loss).backward() else: logging.warning(f"Grad scale is small: {cur_grad_scale}") + if params.multi_optim and batch_idx % params.accum_grads == 0: set_batch_count(model, params.batch_idx_train) scheduler_enc.step_batch(params.batch_idx_train) diff --git a/egs/librispeech/ASR/run_v3.sh b/egs/librispeech/ASR/run_v3.sh index d46407a42..8259523de 100755 --- a/egs/librispeech/ASR/run_v3.sh +++ b/egs/librispeech/ASR/run_v3.sh @@ -36,13 +36,12 @@ else --enable-spec-aug False \ --multi-optim True \ --world-size 4 \ + --start-batch 34000 \ --num-epochs 30 \ - --start-epoch 6 \ --full-libri 1 \ --exp-dir ./pruned_transducer_stateless_d2v_v2/$1 \ --max-duration 150 \ - --freeze-finetune-updates 2000 \ - --use-fp16 1 \ + --freeze-finetune-updates 2000 # --use-fp16 1 \ --peak-enc-lr 0.001 \ --peak-dec-lr 0.5 \ --accum-grads 3 \ @@ -55,3 +54,5 @@ else --context-size 2 \ --ctc-loss-scale 0.2 fi + +#--start-epoch 6 \