From 65818d16ded697d6b11c65addc002ac5faae2eaf Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Tue, 12 Apr 2022 11:48:16 +0800 Subject: [PATCH] Add more results --- egs/librispeech/ASR/RESULTS.md | 71 +++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index ce90da356..645e24fdc 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -13,9 +13,15 @@ The WERs are: | | test-clean | test-other | comment | |-------------------------------------|------------|------------|-------------------------------------------------------------------------------| -| greedy search (max sym per frame 1) | 2.62 | 6.37 | --epoch 25, --avg 8, --max-duration 600 | -| fast beam search | 2.61 | 6.17 | --epoch 25, --avg 8, --max-duration 600 --decoding-method fast_beam_search | -| modified beam search | 2.59 | 6.19 | --epoch 25, --avg 8, --max-duration 600 --decoding-method modified_beam_search| +| greedy search (max sym per frame 1) | 2.62 | 6.37 | --epoch 25 --avg 8 --max-duration 600 | +| fast beam search | 2.61 | 6.17 | --epoch 25 --avg 8 --max-duration 600 --decoding-method fast_beam_search | +| modified beam search | 2.59 | 6.19 | --epoch 25 --avg 8 --max-duration 600 --decoding-method modified_beam_search| +| greedy search (max sym per frame 1) | 2.70 | 6.04 | --epoch 34 --avg 10 --max-duration 600 | +| fast beam search | 2.66 | 6.00 | --epoch 34 --avg 10 --max-duration 600 --decoding-method fast_beam_search | +| greedy search (max sym per frame 1) | 2.60 | 6.06 | --epoch 37 --avg 10 --max-duration 600 | +| fast beam search | 2.62 | 5.97 | --epoch 37 --avg 10 --max-duration 600 --decoding-method fast_beam_search | + + The train and decode commands are: @@ -23,7 +29,8 @@ The train and decode commands are: and: `python3 ./pruned_transducer_stateless2/decode.py --exp-dir pruned_transducer_stateless2/exp --epoch 25 --avg 8 --bpe-model ./data/lang_bpe_500/bpe.model --max-duration 600` -The Tensorboard log is at +The Tensorboard log is at (apologies, log starts +only from epoch 3). The WERs for librispeech 100 hours are: @@ -40,7 +47,9 @@ schedule is not visible due to a since-fixed bug). |-------------------------------------|------------|------------|-------------------------------------------------------| | greedy search (max sym per frame 1) | 7.12 | 18.42 | --epoch 19 --avg 8 | | greedy search (max sym per frame 1) | 6.71 | 17.77 | --epoch 29 --avg 8 | -| fast beam search | 6.58 | 17.27 | --epoch 19 --avg 8 --decoding-method fast_beam_search | +| greedy search (max sym per frame 1) | 6.64 | 17.19 | --epoch 39 --avg 10 | +| fast beam search | 6.58 | 17.27 | --epoch 29 --avg 8 --decoding-method fast_beam_search | +| fast beam search | 6.53 | 16.82 | --epoch 39 --avg 10 --decoding-method fast_beam_search | Trained with two jobs: `python3 ./pruned_transducer_stateless2/train.py --exp-dir=pruned_transducer_stateless2/exp_100h_ws2 --world-size 2 --num-epochs 40 --full-libri 0 --max-duration 300` @@ -52,9 +61,9 @@ The Tensorboard log is at . +Train command was +`python3 ./pruned_transducer_stateless2/train.py --exp-dir=pruned_transducer_stateless2/exp_100h_fp16 --world-size 1 --num-epochs 40 --full-libri 0 --max-duration 500 --use-fp16 True` + +The Tensorboard log is at + +| | test-clean | test-other | comment | +|-------------------------------------|------------|------------|-----------------------| +| greedy search (max sym per frame 1) | 7.10 | 18.79 | --epoch 19 --avg 8 | +| greedy search (max sym per frame 1) | 6.92 | 18.16 | --epoch 29 --avg 8 | +| greedy search (max sym per frame 1) | 6.89 | 17.75 | --epoch 30 --avg 10 | + +https://tensorboard.dev/experiment/Km7QBHYnSLWs4qQnAJWsaA/ @@ -91,11 +116,11 @@ The WERs are: | | test-clean | test-other | comment | |-------------------------------------|------------|------------|------------------------------------------| -| greedy search (max sym per frame 1) | 2.62 | 6.37 | --epoch 42, --avg 11, --max-duration 100 | -| greedy search (max sym per frame 2) | 2.62 | 6.37 | --epoch 42, --avg 11, --max-duration 100 | -| greedy search (max sym per frame 3) | 2.62 | 6.37 | --epoch 42, --avg 11, --max-duration 100 | -| modified beam search (beam size 4) | 2.56 | 6.27 | --epoch 42, --avg 11, --max-duration 100 | -| beam search (beam size 4) | 2.57 | 6.27 | --epoch 42, --avg 11, --max-duration 100 | +| greedy search (max sym per frame 1) | 2.62 | 6.37 | --epoch 42 --avg 11 --max-duration 100 | +| greedy search (max sym per frame 2) | 2.62 | 6.37 | --epoch 42 --avg 11 --max-duration 100 | +| greedy search (max sym per frame 3) | 2.62 | 6.37 | --epoch 42 --avg 11 --max-duration 100 | +| modified beam search (beam size 4) | 2.56 | 6.27 | --epoch 42 --avg 11 --max-duration 100 | +| beam search (beam size 4) | 2.57 | 6.27 | --epoch 42 --avg 11 --max-duration 100 | @@ -189,7 +214,7 @@ The WERs are | | test-clean | test-other | comment | |---------------------------|------------|------------|------------------------------------------| -| greedy search | 2.85 | 6.98 | --epoch 28, --avg 15, --max-duration 100 | +| greedy search | 2.85 | 6.98 | --epoch 28 --avg 15 --max-duration 100 | The training command for reproducing is given below: @@ -249,8 +274,8 @@ The WERs are | | test-clean | test-other | comment | |-------------------------------------|------------|------------|------------------------------------------| -| greedy search (max sym per frame 1) | 2.64 | 6.55 | --epoch 39, --avg 15, --max-duration 100 | -| modified beam search (beam size 4) | 2.61 | 6.46 | --epoch 39, --avg 15, --max-duration 100 | +| greedy search (max sym per frame 1) | 2.64 | 6.55 | --epoch 39 --avg 15 --max-duration 100 | +| modified beam search (beam size 4) | 2.61 | 6.46 | --epoch 39 --avg 15 --max-duration 100 | The training command for reproducing is given below: @@ -319,10 +344,10 @@ The WERs are | | test-clean | test-other | comment | |-------------------------------------|------------|------------|------------------------------------------| -| greedy search (max sym per frame 1) | 2.67 | 6.67 | --epoch 63, --avg 19, --max-duration 100 | -| greedy search (max sym per frame 2) | 2.67 | 6.67 | --epoch 63, --avg 19, --max-duration 100 | -| greedy search (max sym per frame 3) | 2.67 | 6.67 | --epoch 63, --avg 19, --max-duration 100 | -| modified beam search (beam size 4) | 2.67 | 6.57 | --epoch 63, --avg 19, --max-duration 100 | +| greedy search (max sym per frame 1) | 2.67 | 6.67 | --epoch 63 --avg 19 --max-duration 100 | +| greedy search (max sym per frame 2) | 2.67 | 6.67 | --epoch 63 --avg 19 --max-duration 100 | +| greedy search (max sym per frame 3) | 2.67 | 6.67 | --epoch 63 --avg 19 --max-duration 100 | +| modified beam search (beam size 4) | 2.67 | 6.57 | --epoch 63 --avg 19 --max-duration 100 | The training command for reproducing is given below: