From 93dd3f588735f0fc8806a7b1ce19e460ba12e7d1 Mon Sep 17 00:00:00 2001 From: pkufool Date: Fri, 23 Jun 2023 21:16:09 +0800 Subject: [PATCH] Update results --- egs/wenetspeech/ASR/RESULTS.md | 85 ++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/egs/wenetspeech/ASR/RESULTS.md b/egs/wenetspeech/ASR/RESULTS.md index 658ad4a9b..1a0e0681f 100644 --- a/egs/wenetspeech/ASR/RESULTS.md +++ b/egs/wenetspeech/ASR/RESULTS.md @@ -1,5 +1,90 @@ ## Results +### WenetSpeech char-based training results (Non-streaming and streaming) on zipformer model + +This is the [pull request](https://github.com/k2-fsa/icefall/pull/1130) in icefall. + +#### Non-streaming + +Best results (num of params : ~76M): + +Type | Greedy(dev & net & meeting) | Beam search(dev & net & meeting) |   +-- | -- | -- | -- +Non-streaming | 7.36 & 7.65 & 12.43 | 7.32 & 7.61 & 12.35 | --epoch=12 + +The training command: + +``` +./zipformer/train.py \ + --world-size 6 \ + --num-epochs 12 \ + --use-fp16 1 \ + --max-duration 450 \ + --training-subset L \ + --lr-epochs 1.5 \ + --context-size 2 \ + --exp-dir zipformer/exp_L_context_2 \ + --causal 0 \ + --num-workers 8 +``` + +Listed best results for each epoch below: + +Epoch | Greedy search(dev & net & meeting) | Modified beam search(dev & net & meeting) |   +-- | -- | -- | -- +4 | 7.83 & 8.86 &13.73 | 7.75 & 8.81 & 13.67 | avg=1;blank-penalty=2 +5 | 7.75 & 8.46 & 13.38 | 7.68 & 8.41 & 13.27 | avg=1;blank-penalty=2 +6 | 7.72 & 8.19 & 13.16 | 7.62 & 8.14 & 13.06 | avg=1;blank-penalty=2 +7 | 7.59 & 8.08 & 12.97 | 7.53 & 8.01 & 12.87 | avg=2;blank-penalty=2 +8 | 7.68 & 7.87 & 12.96 | 7.61 & 7.81 & 12.88 | avg=1;blank-penalty=2 +9 | 7.57 & 7.77 & 12.87 | 7.5 & 7.71 & 12.77 | avg=1;blank-penalty=2 +10 | 7.45 & 7.7 & 12.69 | 7.39 & 7.63 & 12.59 | avg=2;blank-penalty=2 +11 | 7.35 & 7.67 & 12.46 | 7.31 & 7.63 & 12.43 | avg=3;blank-penalty=2 +12 | 7.36 & 7.65 & 12.43 | 7.32 & 7.61 & 12.35 | avg=4;blank-penalty=2 + +The pre-trained model is available here : https://huggingface.co/pkufool/icefall-asr-zipformer-wenetspeech-20230615 + + +#### Streaming + +Best results (num of params : ~76M): + +Type | Greedy(dev & net & meeting) | Beam search(dev & net & meeting) |   +-- | -- | -- | -- +Streaming | 8.45 & 9.89 & 16.46 | 8.21 & 9.77 & 16.07 | --epoch=12; --chunk-size=16; --left-context-frames=256 +Streaming | 8.0 & 9.0 & 15.11 | 7.84 & 8.94 & 14.92 | --epoch=12; --chunk-size=32; --left-context-frames=256 + +The training command: + +``` +./zipformer/train.py \ + --world-size 8 \ + --num-epochs 12 \ + --use-fp16 1 \ + --max-duration 450 \ + --training-subset L \ + --lr-epochs 1.5 \ + --context-size 2 \ + --exp-dir zipformer/exp_L_causal_context_2 \ + --causal 1 \ + --num-workers 8 +``` + +Best results for each epoch (--chunk-size=16; --left-context-frames=128) + +Epoch | Greedy search(dev & net & meeting) | Modified beam search(dev & net & meeting) |   +-- | -- | -- | -- +6 | 9.14 & 10.75 & 18.15 | 8.79 & 10.54 & 17.64 | avg=1;blank-penalty=1.5 +7 | 9.11 & 10.61 & 17.86 | 8.8 & 10.42 & 17.29 | avg=1;blank-penalty=1.5 +8 | 8.89 & 10.32 & 17.44 | 8.59 & 10.09 & 16.9 | avg=1;blank-penalty=1.5 +9 | 8.86 & 10.11 & 17.35 | 8.55 & 9.87 & 16.76 | avg=1;blank-penalty=1.5 +10 | 8.66 & 10.0 & 16.94 | 8.39 & 9.83 & 16.47 | avg=2;blank-penalty=1.5 +11 | 8.58 & 9.92 & 16.67 | 8.32 & 9.77 & 16.27 | avg=3;blank-penalty=1.5 +12 | 8.45 & 9.89 & 16.46 | 8.21 & 9.77 & 16.07 | avg=4;blank-penalty=1.5 + +The pre-trained model is available here: https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615 + + ### WenetSpeech char-based training results (offline and streaming) (Pruned Transducer 5) #### 2022-07-22