From 9af144c26b91065a119d4e67c03004974462d24d Mon Sep 17 00:00:00 2001 From: Zengwei Yao Date: Mon, 9 Oct 2023 23:15:22 +0800 Subject: [PATCH] Zipformer update result (#1296) * update Zipformer results --- README.md | 6 +++--- egs/librispeech/ASR/RESULTS.md | 34 +++++++++++++++++++++------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c89e7b9aa..da446109d 100644 --- a/README.md +++ b/README.md @@ -120,9 +120,9 @@ We provide a Colab notebook to run a pre-trained transducer conformer + stateles | Encoder | Params | test-clean | test-other | |-----------------|--------|------------|------------| -| zipformer | 65.5M | 2.21 | 4.91 | -| zipformer-small | 23.2M | 2.46 | 5.83 | -| zipformer-large | 148.4M | 2.11 | 4.77 | +| zipformer | 65.5M | 2.21 | 4.79 | +| zipformer-small | 23.2M | 2.42 | 5.73 | +| zipformer-large | 148.4M | 2.06 | 4.63 | Note: No auxiliary losses are used in the training and no LMs are used in the decoding. diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index b945f43fd..fc7fcdc26 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -75,7 +75,7 @@ See for more details. ##### normal-scaled model, number of model parameters: 65549011, i.e., 65.55 M The tensorboard log can be found at - + You can find a pretrained model, training logs, decoding logs, and decoding results at: @@ -90,18 +90,20 @@ You can use to deploy it. | greedy_search | 2.23 | 4.96 | --epoch 40 --avg 16 | | modified_beam_search | 2.21 | 4.91 | --epoch 40 --avg 16 | | fast_beam_search | 2.24 | 4.93 | --epoch 40 --avg 16 | +| greedy_search | 2.22 | 4.87 | --epoch 50 --avg 25 | +| modified_beam_search | 2.21 | 4.79 | --epoch 50 --avg 25 | +| fast_beam_search | 2.21 | 4.82 | --epoch 50 --avg 25 | | modified_beam_search_shallow_fusion | 2.01 | 4.37 | --epoch 40 --avg 16 --beam-size 12 --lm-scale 0.3 | | modified_beam_search_LODR | 1.94 | 4.17 | --epoch 40 --avg 16 --beam-size 12 --lm-scale 0.52 --LODR-scale -0.26 | | modified_beam_search_rescore | 2.04 | 4.39 | --epoch 40 --avg 16 --beam-size 12 | | modified_beam_search_rescore_LODR | 2.01 | 4.33 | --epoch 40 --avg 16 --beam-size 12 | - The training command is: ```bash export CUDA_VISIBLE_DEVICES="0,1,2,3" ./zipformer/train.py \ --world-size 4 \ - --num-epochs 40 \ + --num-epochs 50 \ --start-epoch 1 \ --use-fp16 1 \ --exp-dir zipformer/exp \ @@ -115,8 +117,8 @@ The decoding command is: export CUDA_VISIBLE_DEVICES="0" for m in greedy_search modified_beam_search fast_beam_search; do ./zipformer/decode.py \ - --epoch 30 \ - --avg 9 \ + --epoch 50 \ + --avg 25 \ --use-averaged-model 1 \ --exp-dir ./zipformer/exp \ --max-duration 600 \ @@ -129,7 +131,7 @@ To decode with external language models, please refer to the documentation [here ##### small-scaled model, number of model parameters: 23285615, i.e., 23.3 M The tensorboard log can be found at - + You can find a pretrained model, training logs, decoding logs, and decoding results at: @@ -144,13 +146,16 @@ You can use to deploy it. | greedy_search | 2.49 | 5.91 | --epoch 40 --avg 13 | | modified_beam_search | 2.46 | 5.83 | --epoch 40 --avg 13 | | fast_beam_search | 2.46 | 5.87 | --epoch 40 --avg 13 | +| greedy_search | 2.46 | 5.86 | --epoch 50 --avg 23 | +| modified_beam_search | 2.42 | 5.73 | --epoch 50 --avg 23 | +| fast_beam_search | 2.46 | 5.78 | --epoch 50 --avg 23 | The training command is: ```bash export CUDA_VISIBLE_DEVICES="0,1" ./zipformer/train.py \ --world-size 2 \ - --num-epochs 40 \ + --num-epochs 50 \ --start-epoch 1 \ --use-fp16 1 \ --exp-dir zipformer/exp-small \ @@ -169,8 +174,8 @@ The decoding command is: export CUDA_VISIBLE_DEVICES="0" for m in greedy_search modified_beam_search fast_beam_search; do ./zipformer/decode.py \ - --epoch 40 \ - --avg 13 \ + --epoch 50 \ + --avg 23 \ --exp-dir zipformer/exp-small \ --max-duration 600 \ --causal 0 \ @@ -185,7 +190,7 @@ done ##### large-scaled model, number of model parameters: 148439574, i.e., 148.4 M The tensorboard log can be found at - + You can find a pretrained model, training logs, decoding logs, and decoding results at: @@ -200,13 +205,16 @@ You can use to deploy it. | greedy_search | 2.12 | 4.8 | --epoch 40 --avg 13 | | modified_beam_search | 2.11 | 4.7 | --epoch 40 --avg 13 | | fast_beam_search | 2.13 | 4.78 | --epoch 40 --avg 13 | +| greedy_search | 2.08 | 4.69 | --epoch 50 --avg 30 | +| modified_beam_search | 2.06 | 4.63 | --epoch 50 --avg 30 | +| fast_beam_search | 2.09 | 4.68 | --epoch 50 --avg 30 | The training command is: ```bash export CUDA_VISIBLE_DEVICES="0,1,2,3" ./zipformer/train.py \ --world-size 4 \ - --num-epochs 40 \ + --num-epochs 50 \ --start-epoch 1 \ --use-fp16 1 \ --exp-dir zipformer/exp-large \ @@ -224,8 +232,8 @@ The decoding command is: export CUDA_VISIBLE_DEVICES="0" for m in greedy_search modified_beam_search fast_beam_search; do ./zipformer/decode.py \ - --epoch 40 \ - --avg 16 \ + --epoch 50 \ + --avg 30 \ --exp-dir zipformer/exp-large \ --max-duration 600 \ --causal 0 \