mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-27 02:34:21 +00:00
Update RESULTS.
This commit is contained in:
parent
e1936fa5a8
commit
3b93761199
@ -74,24 +74,53 @@ jobs:
|
|||||||
mkdir tmp
|
mkdir tmp
|
||||||
cd tmp
|
cd tmp
|
||||||
git lfs install
|
git lfs install
|
||||||
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10
|
git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
|
||||||
cd ..
|
cd ..
|
||||||
tree tmp
|
tree tmp
|
||||||
soxi tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/*.wav
|
soxi tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
||||||
ls -lh tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/*.wav
|
ls -lh tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/*.wav
|
||||||
|
|
||||||
- name: Run greedy search decoding
|
- name: Run greedy search decoding (max-sym-per-frame 1)
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PYTHONPATH=$PWD:PYTHONPATH
|
export PYTHONPATH=$PWD:PYTHONPATH
|
||||||
cd egs/librispeech/ASR
|
cd egs/librispeech/ASR
|
||||||
./transducer_stateless/pretrained.py \
|
./transducer_stateless/pretrained.py \
|
||||||
--method greedy_search \
|
--method greedy_search \
|
||||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/exp/pretrained.pt \
|
--max-sym-per-frame 1 \
|
||||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/data/lang_bpe_500/bpe.model \
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1089-134686-0001.wav \
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0001.wav \
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0002.wav
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
- name: Run greedy search decoding (max-sym-per-frame 2)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:PYTHONPATH
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
./transducer_stateless/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame 2 \
|
||||||
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||||
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
- name: Run greedy search decoding (max-sym-per-frame 3)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:PYTHONPATH
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
./transducer_stateless/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame 3 \
|
||||||
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||||
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
- name: Run beam search decoding
|
- name: Run beam search decoding
|
||||||
shell: bash
|
shell: bash
|
||||||
@ -101,8 +130,22 @@ jobs:
|
|||||||
./transducer_stateless/pretrained.py \
|
./transducer_stateless/pretrained.py \
|
||||||
--method beam_search \
|
--method beam_search \
|
||||||
--beam-size 4 \
|
--beam-size 4 \
|
||||||
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/exp/pretrained.pt \
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||||
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/data/lang_bpe_500/bpe.model \
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1089-134686-0001.wav \
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0001.wav \
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||||
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10/test_wavs/1221-135766-0002.wav
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
- name: Run modified beam search decoding
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
./transducer_stateless/pretrained.py \
|
||||||
|
--method modified_beam_search \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/exp/pretrained.pt \
|
||||||
|
--bpe-model ./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/data/lang_bpe_500/bpe.model \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1089-134686-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0001.wav \
|
||||||
|
./tmp/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07/test_wavs/1221-135766-0002.wav
|
||||||
|
@ -80,16 +80,16 @@ We provide a Colab notebook to run a pre-trained RNN-T conformer model: [](https://colab.research.google.com/drive/1Rc4Is-3Yp9LbcEz_Iy8hfyenyHsyjvqE?usp=sharing)
|
We provide a Colab notebook to run a pre-trained transducer conformer + stateless decoder model: [](https://colab.research.google.com/drive/1CO1bXJ-2khDckZIW8zjOPHGSKLHpTDlp?usp=sharing)
|
||||||
|
|
||||||
### Aishell
|
### Aishell
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#### Conformer encoder + embedding decoder
|
#### Conformer encoder + embedding decoder
|
||||||
|
|
||||||
Using commit `4c1b3665ee6efb935f4dd93a80ff0e154b13efb6`.
|
Using commit `TODO`.
|
||||||
|
|
||||||
Conformer encoder + non-recurrent decoder. The decoder
|
Conformer encoder + non-recurrent decoder. The decoder
|
||||||
contains only an embedding layer and a Conv1d (with kernel size 2).
|
contains only an embedding layer and a Conv1d (with kernel size 2).
|
||||||
@ -12,54 +12,65 @@ contains only an embedding layer and a Conv1d (with kernel size 2).
|
|||||||
The WERs are
|
The WERs are
|
||||||
|
|
||||||
| | test-clean | test-other | comment |
|
| | test-clean | test-other | comment |
|
||||||
|---------------------------|------------|------------|------------------------------------------|
|
|-------------------------------------|------------|------------|------------------------------------------|
|
||||||
| greedy search | 2.69 | 6.81 | --epoch 71, --avg 15, --max-duration 100 |
|
| greedy search (max sym per frame 1) | 2.68 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||||
| beam search (beam size 4) | 2.68 | 6.72 | --epoch 71, --avg 15, --max-duration 100 |
|
| greedy search (max sym per frame 2) | 2.69 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||||
|
| greedy search (max sym per frame 3) | 2.69 | 6.71 | --epoch 61, --avg 18, --max-duration 100 |
|
||||||
|
| modified beam search (beam size 4) | 2.67 | 6.64 | --epoch 61, --avg 18, --max-duration 100 |
|
||||||
|
|
||||||
|
|
||||||
The training command for reproducing is given below:
|
The training command for reproducing is given below:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
./prepare.sh
|
||||||
export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||||
|
|
||||||
./transducer_stateless/train.py \
|
./transducer_stateless/train.py \
|
||||||
--world-size 4 \
|
--world-size 4 \
|
||||||
--num-epochs 76 \
|
--num-epochs 76 \
|
||||||
--start-epoch 0 \
|
--start-epoch 0 \
|
||||||
--exp-dir transducer_stateless/exp-full \
|
--exp-dir transducer_stateless/exp-full \
|
||||||
--full-libri 1 \
|
--full-libri 1 \
|
||||||
--max-duration 250 \
|
--max-duration 300 \
|
||||||
--lr-factor 3
|
--lr-factor 5 \
|
||||||
|
--bpe-model data/lang_bpe_500/bpe.model \
|
||||||
|
--modified-transducer-prob 0.25
|
||||||
```
|
```
|
||||||
|
|
||||||
The tensorboard training log can be found at
|
The tensorboard training log can be found at
|
||||||
<https://tensorboard.dev/experiment/qGdqzHnxS0WJ695OXfZDzA/#scalars&_smoothingWeight=0>
|
<https://tensorboard.dev/experiment/qgvWkbF2R46FYA6ZMNmOjA/#scalars>
|
||||||
|
|
||||||
The decoding command is:
|
The decoding command is:
|
||||||
```
|
```
|
||||||
epoch=71
|
epoch=61
|
||||||
avg=15
|
avg=18
|
||||||
|
|
||||||
## greedy search
|
## greedy search
|
||||||
./transducer_stateless/decode.py \
|
for sym in 1 2 3; do
|
||||||
--epoch $epoch \
|
|
||||||
--avg $avg \
|
|
||||||
--exp-dir transducer_stateless/exp-full \
|
|
||||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
|
||||||
--max-duration 100
|
|
||||||
|
|
||||||
## beam search
|
|
||||||
./transducer_stateless/decode.py \
|
./transducer_stateless/decode.py \
|
||||||
--epoch $epoch \
|
--epoch $epoch \
|
||||||
--avg $avg \
|
--avg $avg \
|
||||||
--exp-dir transducer_stateless/exp-full \
|
--exp-dir transducer_stateless/exp-full \
|
||||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||||
--max-duration 100 \
|
--max-duration 100 \
|
||||||
--decoding-method beam_search \
|
--max-sym-per-frame $sym
|
||||||
|
done
|
||||||
|
|
||||||
|
## modified beam search
|
||||||
|
|
||||||
|
./transducer_stateless/decode.py \
|
||||||
|
--epoch $epoch \
|
||||||
|
--avg $avg \
|
||||||
|
--exp-dir transducer_stateless/exp-full \
|
||||||
|
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||||
|
--max-duration 100 \
|
||||||
|
--context-size 2 \
|
||||||
|
--decoding-method modified_beam_search \
|
||||||
--beam-size 4
|
--beam-size 4
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find a pretrained model by visiting
|
You can find a pretrained model by visiting
|
||||||
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-01-10>
|
<https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07>
|
||||||
|
|
||||||
|
|
||||||
#### Conformer encoder + LSTM decoder
|
#### Conformer encoder + LSTM decoder
|
||||||
|
@ -22,10 +22,11 @@ Usage:
|
|||||||
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||||
--method greedy_search \
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame 1 \
|
||||||
/path/to/foo.wav \
|
/path/to/foo.wav \
|
||||||
/path/to/bar.wav \
|
/path/to/bar.wav \
|
||||||
|
|
||||||
(1) beam search
|
(2) beam search
|
||||||
./transducer_stateless/pretrained.py \
|
./transducer_stateless/pretrained.py \
|
||||||
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||||
@ -34,6 +35,15 @@ Usage:
|
|||||||
/path/to/foo.wav \
|
/path/to/foo.wav \
|
||||||
/path/to/bar.wav \
|
/path/to/bar.wav \
|
||||||
|
|
||||||
|
(3) modified beam search
|
||||||
|
./transducer_stateless/pretrained.py \
|
||||||
|
--checkpoint ./transducer_stateless/exp/pretrained.pt \
|
||||||
|
--bpe-model ./data/lang_bpe_500/bpe.model \
|
||||||
|
--method modified_beam_search \
|
||||||
|
--beam-size 4 \
|
||||||
|
/path/to/foo.wav \
|
||||||
|
/path/to/bar.wav \
|
||||||
|
|
||||||
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
You can also use `./transducer_stateless/exp/epoch-xx.pt`.
|
||||||
|
|
||||||
Note: ./transducer_stateless/exp/pretrained.pt is generated by
|
Note: ./transducer_stateless/exp/pretrained.pt is generated by
|
||||||
@ -50,7 +60,7 @@ import kaldifeat
|
|||||||
import sentencepiece as spm
|
import sentencepiece as spm
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
from beam_search import beam_search, greedy_search
|
from beam_search import beam_search, greedy_search, modified_beam_search
|
||||||
from conformer import Conformer
|
from conformer import Conformer
|
||||||
from decoder import Decoder
|
from decoder import Decoder
|
||||||
from joiner import Joiner
|
from joiner import Joiner
|
||||||
@ -90,6 +100,7 @@ def get_parser():
|
|||||||
help="""Possible values are:
|
help="""Possible values are:
|
||||||
- greedy_search
|
- greedy_search
|
||||||
- beam_search
|
- beam_search
|
||||||
|
- modified_beam_search
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -107,7 +118,7 @@ def get_parser():
|
|||||||
"--beam-size",
|
"--beam-size",
|
||||||
type=int,
|
type=int,
|
||||||
default=4,
|
default=4,
|
||||||
help="Used only when --method is beam_search",
|
help="Used only when --method is beam_search and modified_beam_search ",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@ -300,6 +311,10 @@ def main():
|
|||||||
hyp = beam_search(
|
hyp = beam_search(
|
||||||
model=model, encoder_out=encoder_out_i, beam=params.beam_size
|
model=model, encoder_out=encoder_out_i, beam=params.beam_size
|
||||||
)
|
)
|
||||||
|
elif params.decoding_method == "modified_beam_search":
|
||||||
|
hyp = modified_beam_search(
|
||||||
|
model=model, encoder_out=encoder_out_i, beam=params.beam_size
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported method: {params.method}")
|
raise ValueError(f"Unsupported method: {params.method}")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user