mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Add HLG decoding with OpenFst on CPU for aishell conformer_ctc (#1279)
This commit is contained in:
parent
48cc41bd83
commit
f14b673408
80
.github/scripts/run-pre-trained-conformer-ctc.sh
vendored
80
.github/scripts/run-pre-trained-conformer-ctc.sh
vendored
@ -8,7 +8,7 @@ log() {
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
pushd egs/librispeech/ASR
|
||||
|
||||
# repo_url=https://github.com/csukuangfj/icefall-asr-conformer-ctc-bpe-500
|
||||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||||
@ -112,3 +112,81 @@ log "Decoding with HLG on CPU with OpenFst"
|
||||
$repo/test_wavs/1089-134686-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0002.wav
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
popd
|
||||
|
||||
log "Test aishell"
|
||||
|
||||
pushd egs/aishell/ASR
|
||||
|
||||
repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo
|
||||
|
||||
git lfs pull --include "exp/pretrained.pt"
|
||||
git lfs pull --include "data/lm/G_3_gram_char.fst.txt"
|
||||
|
||||
popd
|
||||
|
||||
log "Display test files"
|
||||
tree $repo/
|
||||
ls -lh $repo/test_wavs/*.wav
|
||||
|
||||
log "CTC decoding"
|
||||
|
||||
log "Exporting model with torchscript"
|
||||
|
||||
pushd $repo/exp
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
popd
|
||||
|
||||
./conformer_ctc/export.py \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_char/tokens.txt \
|
||||
--jit 1
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "Generating H.fst, HL.fst"
|
||||
|
||||
./local/prepare_lang_fst.py --lang-dir $repo/data/lang_char --ngram-G $repo/data/lm/G_3_gram_char.fst.txt
|
||||
|
||||
ls -lh $repo/data/lang_char
|
||||
|
||||
log "Decoding with H on CPU with OpenFst"
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
||||
--nn-model $repo/exp/cpu_jit.pt \
|
||||
--H $repo/data/lang_char/H.fst \
|
||||
--tokens $repo/data/lang_char/tokens.txt \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav
|
||||
|
||||
log "Decoding with HL on CPU with OpenFst"
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
||||
--nn-model $repo/exp/cpu_jit.pt \
|
||||
--HL $repo/data/lang_char/HL.fst \
|
||||
--words $repo/data/lang_char/words.txt \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav
|
||||
|
||||
log "Decoding with HLG on CPU with OpenFst"
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
||||
--nn-model $repo/exp/cpu_jit.pt \
|
||||
--HLG $repo/data/lang_char/HLG.fst \
|
||||
--words $repo/data/lang_char/words.txt \
|
||||
$repo/test_wavs/0.wav \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav
|
||||
|
||||
rm -rf $repo
|
||||
|
2
.github/workflows/run-yesno-recipe.yml
vendored
2
.github/workflows/run-yesno-recipe.yml
vendored
@ -60,7 +60,7 @@ jobs:
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
|
21
egs/aishell/ASR/conformer_ctc/export.py
Normal file → Executable file
21
egs/aishell/ASR/conformer_ctc/export.py
Normal file → Executable file
@ -23,12 +23,12 @@ import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import k2
|
||||
import torch
|
||||
from conformer import Conformer
|
||||
|
||||
from icefall.checkpoint import average_checkpoints, load_checkpoint
|
||||
from icefall.lexicon import Lexicon
|
||||
from icefall.utils import AttributeDict, str2bool
|
||||
from icefall.utils import AttributeDict, num_tokens, str2bool
|
||||
|
||||
|
||||
def get_parser():
|
||||
@ -63,11 +63,10 @@ def get_parser():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--lang-dir",
|
||||
"--tokens",
|
||||
type=str,
|
||||
default="data/lang_char",
|
||||
help="""It contains language related input files such as "lexicon.txt"
|
||||
""",
|
||||
required=True,
|
||||
help="Path to the tokens.txt.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@ -98,16 +97,16 @@ def get_params() -> AttributeDict:
|
||||
def main():
|
||||
args = get_parser().parse_args()
|
||||
args.exp_dir = Path(args.exp_dir)
|
||||
args.lang_dir = Path(args.lang_dir)
|
||||
|
||||
params = get_params()
|
||||
params.update(vars(args))
|
||||
|
||||
logging.info(params)
|
||||
# Load tokens.txt here
|
||||
token_table = k2.SymbolTable.from_file(params.tokens)
|
||||
|
||||
lexicon = Lexicon(params.lang_dir)
|
||||
max_token_id = max(lexicon.tokens)
|
||||
num_classes = max_token_id + 1 # +1 for the blank
|
||||
num_classes = num_tokens(token_table) + 1 # +1 for the blank
|
||||
|
||||
logging.info(params)
|
||||
|
||||
device = torch.device("cpu")
|
||||
if torch.cuda.is_available():
|
||||
|
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_H.py
Symbolic link
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_H.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py
|
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py
Symbolic link
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py
|
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py
Symbolic link
1
egs/aishell/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py
|
0
egs/aishell/ASR/conformer_ctc/test_transformer.py
Normal file → Executable file
0
egs/aishell/ASR/conformer_ctc/test_transformer.py
Normal file → Executable file
1
egs/aishell/ASR/local/prepare_lang_fst.py
Symbolic link
1
egs/aishell/ASR/local/prepare_lang_fst.py
Symbolic link
@ -0,0 +1 @@
|
||||
../../../librispeech/ASR/local/prepare_lang_fst.py
|
@ -143,6 +143,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
|
||||
./local/prepare_lang.py --lang-dir $lang_phone_dir
|
||||
fi
|
||||
|
||||
|
||||
# Train a bigram P for MMI training
|
||||
if [ ! -f $lang_phone_dir/transcript_words.txt ]; then
|
||||
log "Generate data to train phone based bigram P"
|
||||
@ -203,6 +204,10 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
|
||||
if [ ! -f $lang_char_dir/L_disambig.pt ]; then
|
||||
./local/prepare_char.py --lang-dir $lang_char_dir
|
||||
fi
|
||||
|
||||
if [ ! -f $lang_char_dir/HLG.fst ]; then
|
||||
./local/prepare_lang_fst.py --lang-dir $lang_phone_dir --ngram-G ./data/lm/G_3_gram.fst.txt
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
|
||||
|
@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
|
||||
|
||||
Usage:
|
||||
|
||||
(1) LibriSpeech conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--H ./data/lang_bpe_500/H.fst \
|
||||
@ -14,6 +16,17 @@ Usage:
|
||||
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
|
||||
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
|
||||
|
||||
|
||||
(2) AIShell conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--H ./data/lang_char/H.fst \
|
||||
--tokens ./data/lang_char/tokens.txt \
|
||||
./BAC009S0764W0121.wav \
|
||||
./BAC009S0764W0122.wav \
|
||||
./BAC009S0764W0123.wav
|
||||
|
||||
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
|
||||
you can use ./export.py --jit 1
|
||||
"""
|
||||
@ -23,12 +36,11 @@ import logging
|
||||
import math
|
||||
from typing import Dict, List
|
||||
|
||||
import kaldi_hmm_gmm
|
||||
import kaldifeat
|
||||
import kaldifst
|
||||
import torch
|
||||
import torchaudio
|
||||
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
|
||||
|
@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
|
||||
|
||||
Usage:
|
||||
|
||||
(1) LibriSpeech conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--HL ./data/lang_bpe_500/HL.fst \
|
||||
@ -14,6 +16,17 @@ Usage:
|
||||
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
|
||||
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
|
||||
|
||||
(2) AIShell conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--HL ./data/lang_char/HL.fst \
|
||||
--words ./data/lang_char/words.txt \
|
||||
./BAC009S0764W0121.wav \
|
||||
./BAC009S0764W0122.wav \
|
||||
./BAC009S0764W0123.wav
|
||||
|
||||
|
||||
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
|
||||
you can use ./export.py --jit 1
|
||||
"""
|
||||
@ -23,12 +36,11 @@ import logging
|
||||
import math
|
||||
from typing import Dict, List
|
||||
|
||||
import kaldi_hmm_gmm
|
||||
import kaldifeat
|
||||
import kaldifst
|
||||
import torch
|
||||
import torchaudio
|
||||
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
|
||||
|
@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
|
||||
|
||||
Usage:
|
||||
|
||||
(1) LibriSpeech conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--HLG ./data/lang_bpe_500/HLG.fst \
|
||||
@ -14,6 +16,16 @@ Usage:
|
||||
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
|
||||
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
|
||||
|
||||
(2) AIShell conformer_ctc
|
||||
|
||||
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
||||
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
|
||||
--HLG ./data/lang_char/HLG.fst \
|
||||
--words ./data/lang_char/words.txt \
|
||||
./BAC009S0764W0121.wav \
|
||||
./BAC009S0764W0122.wav \
|
||||
./BAC009S0764W0123.wav
|
||||
|
||||
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
|
||||
you can use ./export.py --jit 1
|
||||
"""
|
||||
@ -23,12 +35,11 @@ import logging
|
||||
import math
|
||||
from typing import Dict, List
|
||||
|
||||
import kaldi_hmm_gmm
|
||||
import kaldifeat
|
||||
import kaldifst
|
||||
import torch
|
||||
import torchaudio
|
||||
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
|
||||
|
@ -28,7 +28,7 @@ import kaldifeat
|
||||
import kaldifst
|
||||
import torch
|
||||
import torchaudio
|
||||
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
|
||||
|
@ -28,7 +28,7 @@ import kaldifeat
|
||||
import kaldifst
|
||||
import torch
|
||||
import torchaudio
|
||||
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
# Introduction
|
||||
|
||||
This folder uses [kaldifst][kaldifst] for graph construction
|
||||
and decoders from [kaldi-hmm-gmm][kaldi-hmm-gmm] for CTC decoding.
|
||||
and decoders from [kaldi-decoder][kaldi-decoder] for CTC decoding.
|
||||
|
||||
It supports only `CPU`.
|
||||
|
||||
You can use
|
||||
|
||||
```bash
|
||||
pip install kaldifst kaldi-hmm-gmm
|
||||
pip install kaldifst kaldi-decoder
|
||||
```
|
||||
to install the dependencies.
|
||||
|
||||
[kaldi-hmm-gmm]: https://github.com/csukuangfj/kaldi-hmm-gmm
|
||||
[kaldi-decoder]: https://github.com/i2-fsa/kaldi-decoder
|
||||
[kaldifst]: https://github.com/k2-fsa/kaldifst
|
||||
[k2]: https://github.com/k2-fsa/k2
|
||||
|
@ -1,7 +1,7 @@
|
||||
kaldifst
|
||||
kaldilm
|
||||
kaldialign
|
||||
kaldi-hmm-gmm
|
||||
kaldi-decoder
|
||||
sentencepiece>=0.1.96
|
||||
tensorboard
|
||||
typeguard
|
||||
|
Loading…
x
Reference in New Issue
Block a user