diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp index 2a526fedd..ec987790b 100644 Binary files a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp and b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/.train.py.swp differ diff --git a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py index a2e8a95ab..aa8940f0e 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless_d2v_v2/train.py @@ -41,6 +41,34 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" --full-libri 1 \ --max-duration 550 +# For d2v-T training: +export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" + +./pruned_transducer_stateless_d2v_v2/train.py \ + --wandb true \ + --input-strategy AudioSamples \ + --enable-spec-aug False \ + --multi-optim True \ + --world-size 8 \ + --num-epochs 30 \ + --start-epoch 1 \ + --full-libri 0 \ + --exp-dir ./pruned_transducer_stateless_d2v_v2/$1 \ + --max-duration 250 \ + --freeze-finetune-updates 2000 \ + --use-fp16 1 \ + --peak-enc-lr 0.001 \ + --peak-dec-lr 0.05 \ + --accum-grads 1 \ + --encoder-type d2v \ + --additional-block True \ + --encoder-dim 768 \ + --decoder-dim 768 \ + --joiner-dim 768 \ + --prune-range 20 \ + --context-size 2 \ + --ctc-loss-scale 0.2 + """