diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index cc9cb34ba..3290bc93c 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -556,9 +556,9 @@ Number of model parameters 118129516 (i.e, 118.13 M). | | test-clean | test-other | comment | |-------------------------------------|------------|------------|----------------------------------------| -| greedy search (max sym per frame 1) | 2.39 | 5.57 | --epoch 39 --avg 7 --max-duration 600 | -| modified beam search | 2.35 | 5.50 | --epoch 39 --avg 7 --max-duration 600 | -| fast beam search | 2.38 | 5.50 | --epoch 39 --avg 7 --max-duration 600 | +| greedy search (max sym per frame 1) | 2.43 | 5.72 | --epoch 30 --avg 10 --max-duration 600 | +| modified beam search | 2.43 | 5.69 | --epoch 30 --avg 10 --max-duration 600 | +| fast beam search | 2.43 | 5.67 | --epoch 30 --avg 10 --max-duration 600 | The training commands are: @@ -567,8 +567,8 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ./pruned_transducer_stateless5/train.py \ --world-size 8 \ - --num-epochs 40 \ - --start-epoch 0 \ + --num-epochs 30 \ + --start-epoch 1 \ --full-libri 1 \ --exp-dir pruned_transducer_stateless5/exp-L \ --max-duration 300 \ @@ -582,15 +582,15 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ``` The tensorboard log can be found at - + The decoding commands are: ```bash for method in greedy_search modified_beam_search fast_beam_search; do ./pruned_transducer_stateless5/decode.py \ - --epoch 39 \ - --avg 7 \ + --epoch 30 \ + --avg 10 \ --exp-dir ./pruned_transducer_stateless5/exp-L \ --max-duration 600 \ --decoding-method $method \ @@ -600,13 +600,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do --nhead 8 \ --encoder-dim 512 \ --decoder-dim 512 \ - --joiner-dim 512 + --joiner-dim 512 \ + --use-averaged-model True done ``` You can find a pretrained model, training logs, decoding logs, and decoding results at: - + #### Medium @@ -615,9 +616,9 @@ Number of model parameters 30896748 (i.e, 30.9 M). | | test-clean | test-other | comment | |-------------------------------------|------------|------------|-----------------------------------------| -| greedy search (max sym per frame 1) | 2.88 | 6.69 | --epoch 39 --avg 17 --max-duration 600 | -| modified beam search | 2.83 | 6.59 | --epoch 39 --avg 17 --max-duration 600 | -| fast beam search | 2.83 | 6.61 | --epoch 39 --avg 17 --max-duration 600 | +| greedy search (max sym per frame 1) | 2.87 | 6.92 | --epoch 30 --avg 10 --max-duration 600 | +| modified beam search | 2.83 | 6.75 | --epoch 30 --avg 10 --max-duration 600 | +| fast beam search | 2.81 | 6.76 | --epoch 30 --avg 10 --max-duration 600 | The training commands are: @@ -626,8 +627,8 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ./pruned_transducer_stateless5/train.py \ --world-size 8 \ - --num-epochs 40 \ - --start-epoch 0 \ + --num-epochs 30 \ + --start-epoch 1 \ --full-libri 1 \ --exp-dir pruned_transducer_stateless5/exp-M \ --max-duration 300 \ @@ -641,15 +642,15 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ``` The tensorboard log can be found at - + The decoding commands are: ```bash for method in greedy_search modified_beam_search fast_beam_search; do ./pruned_transducer_stateless5/decode.py \ - --epoch 39 \ - --avg 17 \ + --epoch 30 \ + --avg 10 \ --exp-dir ./pruned_transducer_stateless5/exp-M \ --max-duration 600 \ --decoding-method $method \ @@ -659,13 +660,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do --nhead 4 \ --encoder-dim 256 \ --decoder-dim 512 \ - --joiner-dim 512 + --joiner-dim 512 \ + --use-averaged-model True done ``` You can find a pretrained model, training logs, decoding logs, and decoding results at: - + #### Baseline-2 @@ -675,19 +677,19 @@ layers (24 v.s 12) but a narrower model (1536 feedforward dim and 384 encoder di | | test-clean | test-other | comment | |-------------------------------------|------------|------------|-----------------------------------------| -| greedy search (max sym per frame 1) | 2.41 | 5.70 | --epoch 31 --avg 17 --max-duration 600 | -| modified beam search | 2.41 | 5.69 | --epoch 31 --avg 17 --max-duration 600 | -| fast beam search | 2.41 | 5.69 | --epoch 31 --avg 17 --max-duration 600 | +| greedy search (max sym per frame 1) | 2.54 | 5.72 | --epoch 30 --avg 10 --max-duration 600 | +| modified beam search | 2.47 | 5.71 | --epoch 30 --avg 10 --max-duration 600 | +| fast beam search | 2.5 | 5.72 | --epoch 30 --avg 10 --max-duration 600 | ```bash export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ./pruned_transducer_stateless5/train.py \ --world-size 8 \ - --num-epochs 40 \ - --start-epoch 0 \ + --num-epochs 30 \ + --start-epoch 1 \ --full-libri 1 \ - --exp-dir pruned_transducer_stateless5/exp \ + --exp-dir pruned_transducer_stateless5/exp-B \ --max-duration 300 \ --use-fp16 0 \ --num-encoder-layers 24 \ @@ -699,18 +701,15 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" ``` The tensorboard log can be found at - - -**Caution**: The training script is updated so that epochs are counted from 1 -after the training. + The decoding commands are: ```bash for method in greedy_search modified_beam_search fast_beam_search; do ./pruned_transducer_stateless5/decode.py \ - --epoch 31 \ - --avg 17 \ + --epoch 30 \ + --avg 10 \ --exp-dir ./pruned_transducer_stateless5/exp-M \ --max-duration 600 \ --decoding-method $method \ @@ -720,13 +719,14 @@ for method in greedy_search modified_beam_search fast_beam_search; do --nhead 8 \ --encoder-dim 384 \ --decoder-dim 512 \ - --joiner-dim 512 + --joiner-dim 512 \ + --use-averaged-model True done ``` You can find a pretrained model, training logs, decoding logs, and decoding results at: - + ### LibriSpeech BPE training results (Pruned Stateless Transducer 4)