mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 18:24:18 +00:00
Update RESULTS.
This commit is contained in:
parent
e1936fa5a8
commit
3b93761199
@ -74,24 +74,53 @@ jobs:
|
||||
mkdir tmp
|
||||
cd tmp
|
||||
git lfs install
|
||||
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10
|
||||
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
|
||||
cd ..
|
||||
tree tmp
|
||||
soxi tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/*.wav
|
||||
ls -lh tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/*.wav
|
||||
soxi tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
||||
ls -lh tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
||||
|
||||
- name: Run greedy search decoding
|
||||
- name: Run greedy search decoding (max-sym-per-frame 1)
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=$PWD:PYTHONPATH
|
||||
cd egs/librispeech/ASR
|
||||
./transducer_stateless/pretrained.py \
|
||||
--method greedy_search \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0002.wav
|
||||
--max-sym-per-frame 1 \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||
|
||||
- name: Run greedy search decoding (max-sym-per-frame 2)
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=$PWD:PYTHONPATH
|
||||
cd egs/librispeech/ASR
|
||||
./transducer_stateless/pretrained.py \
|
||||
--method greedy_search \
|
||||
--max-sym-per-frame 2 \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||
|
||||
- name: Run greedy search decoding (max-sym-per-frame 3)
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=$PWD:PYTHONPATH
|
||||
cd egs/librispeech/ASR
|
||||
./transducer_stateless/pretrained.py \
|
||||
--method greedy_search \
|
||||
--max-sym-per-frame 3 \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||
|
||||
- name: Run beam search decoding
|
||||
shell: bash
|
||||
@ -101,8 +130,22 @@ jobs:
|
||||
./transducer_stateless/pretrained.py \
|
||||
--method beam_search \
|
||||
--beam-size 4 \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0002.wav
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||
|
||||
- name: Run modified beam search decoding
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
cd egs/librispeech/ASR
|
||||
./transducer_stateless/pretrained.py \
|
||||
--method modified_beam_search \
|
||||
--beam-size 4 \
|
||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||
|
@ -80,16 +80,16 @@ We provide a Colab notebook to run a pre-trained RNN-T conformer model: [](https://colab.research.google.com/drive/1Rc4Is-3Yp9LbcEz_Iy8hfyenyHsyjvqE?usp=sharing)
|
||||
We provide a Colab notebook to run a pre-trained transducer conformer + stateless decoder model: [](https://colab.research.google.com/drive/1CO1bXJ-2khDckZIW8zjOPHGSKLHpTDlp?usp=sharing)
|
||||
|
||||
### Aishell
|
||||
|
||||
|
@ -4,62 +4,73 @@
|
||||
|
||||
#### Conformer encoder + embedding decoder
|
||||
|
||||
Using commit `4c1b3665ee6efb935f4dd93a80ff0e154b13efb6`.
|
||||
Using commit `TODO`.
|
||||
|
||||
Conformer encoder + non-recurrent decoder. The decoder
|
||||
contains only an embedding layer and a Conv1d (with kernel size 2).
|
||||
|
||||
The WERs are
|
||||
|
||||
| | test-clean | test-other | comment |
|
||||
|---------------------------|------------|------------|------------------------------------------|
|
||||
| greedy search | 2.69 | 6.81 | --epoch 71, --avg 15, --max-duration 100 |
|
||||
| beam search (beam size 4) | 2.68 | 6.72 | --epoch 71, --avg 15, --max-duration 100 |
|
||||
| | test-clean | test-other | comment |
|
||||
|-------------------------------------|------------|------------|------------------------------------------|
|
||||
| greedy search (max sym per frame 1) | 2.68 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||
| greedy search (max sym per frame 2) | 2.69 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||
| greedy search (max sym per frame 3) | 2.69 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||
| modified beam search (beam size 4) | 2.67 | 6.64 | --epoch 61, --avg 18, --max-duration 100 |
|
||||
|
||||
|
||||
The training command for reproducing is given below:
|
||||
|
||||
```
|
||||
cd egs/librispeech/ASR/
|
||||
./prepare.sh
|
||||
export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||
|
||||
./transducer_stateless/train.py \
|
||||
--world-size 4 \
|
||||
--num-epochs 76 \
|
||||
--start-epoch 0 \
|
||||
--exp-dir transducer_stateless/exp-full \
|
||||
--full-libri 1 \
|
||||
--max-duration 250 \
|
||||
--lr-factor 3
|
||||
--max-duration 300 \
|
||||
--lr-factor 5 \
|
||||
--bpe-model data/lang_bpe_500/bpe.model \
|
||||
--modified-transducer-prob 0.25
|
||||
```
|
||||
|
||||
The tensorboard training log can be found at
|
||||
<https://tensorboard.dev/experiment/qGdqzHnxS0WJ695OXfZDzA/#scalars&_smoothingWeight=0>
|
||||
<https://tensorboard.dev/experiment/qgvWkbF2R46FYA6ZMNmOjA/#scalars>
|
||||
|
||||
The decoding command is:
|
||||
```
|
||||
epoch=71
|
||||
avg=15
|
||||
epoch=61
|
||||
avg=18
|
||||
|
||||
## greedy search
|
||||
./transducer_stateless/decode.py \
|
||||
--epoch $epoch \
|
||||
--avg $avg \
|
||||
--exp-dir transducer_stateless/exp-full \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
--max-duration 100
|
||||
for sym in 1 2 3; do
|
||||
./transducer_stateless/decode.py \
|
||||
--epoch $epoch \
|
||||
--avg $avg \
|
||||
--exp-dir transducer_stateless/exp-full \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
--max-duration 100 \
|
||||
--max-sym-per-frame $sym
|
||||
done
|
||||
|
||||
## modified beam search
|
||||
|
||||
## beam search
|
||||
./transducer_stateless/decode.py \
|
||||
--epoch $epoch \
|
||||
--avg $avg \
|
||||
--exp-dir transducer_stateless/exp-full \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
--max-duration 100 \
|
||||
--decoding-method beam_search \
|
||||
--context-size 2 \
|
||||
--decoding-method modified_beam_search \
|
||||
--beam-size 4
|
||||
```
|
||||
|
||||
You can find a pretrained model by visiting
|
||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10>
|
||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07>
|
||||
|
||||
|
||||
#### Conformer encoder + LSTM decoder
|
||||
|
@ -22,10 +22,11 @@ Usage:
|
||||
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
--method greedy_search \
|
||||
--max-sym-per-frame 1 \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
|
||||
(1) beam search
|
||||
(2) beam search
|
||||
./transducer_stateless/pretrained.py \
|
||||
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
@ -34,6 +35,15 @@ Usage:
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
|
||||
(3) modified beam search
|
||||
./transducer_stateless/pretrained.py \
|
||||
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||
--method modified_beam_search \
|
||||
--beam-size 4 \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav \
|
||||
|
||||
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
||||
|
||||
Note: ./transducer_stateless/exp/pretrained.pt is generated by
|
||||
@ -50,7 +60,7 @@ import kaldifeat
|
||||
import sentencepiece as spm
|
||||
import torch
|
||||
import torchaudio
|
||||
from beam_search import beam_search, greedy_search
|
||||
from beam_search import beam_search, greedy_search, modified_beam_search
|
||||
from conformer import Conformer
|
||||
from decoder import Decoder
|
||||
from joiner import Joiner
|
||||
@ -90,6 +100,7 @@ def get_parser():
|
||||
help="""Possible values are:
|
||||
- greedy_search
|
||||
- beam_search
|
||||
- modified_beam_search
|
||||
""",
|
||||
)
|
||||
|
||||
@ -107,7 +118,7 @@ def get_parser():
|
||||
"--beam-size",
|
||||
type=int,
|
||||
default=4,
|
||||
help="Used only when --method is beam_search",
|
||||
help="Used only when --method is beam_search and modified_beam_search ",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -300,6 +311,10 @@ def main():
|
||||
hyp = beam_search(
|
||||
model=model, encoder_out=encoder_out_i, beam=params.beam_size
|
||||
)
|
||||
elif params.decoding_method == "modified_beam_search":
|
||||
hyp = modified_beam_search(
|
||||
model=model, encoder_out=encoder_out_i, beam=params.beam_size
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported method: {params.method}")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user