Add HLG decoding with OpenFst on CPU for aishell conformer_ctc (#1279)

This commit is contained in:
Fangjun Kuang 2023-10-01 13:46:16 +08:00 committed by GitHub
parent 48cc41bd83
commit f14b673408
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 146 additions and 25 deletions

View File

@ -8,7 +8,7 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
} }
cd egs/librispeech/ASR pushd egs/librispeech/ASR
# repo_url=https://github.com/csukuangfj/icefall-asr-conformer-ctc-bpe-500 # repo_url=https://github.com/csukuangfj/icefall-asr-conformer-ctc-bpe-500
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
@ -112,3 +112,81 @@ log "Decoding with HLG on CPU with OpenFst"
$repo/test_wavs/1089-134686-0001.wav \ $repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \ $repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav $repo/test_wavs/1221-135766-0002.wav
rm -rf $repo
popd
log "Test aishell"
pushd egs/aishell/ASR
repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
git lfs pull --include "data/lm/G_3_gram_char.fst.txt"
popd
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
log "CTC decoding"
log "Exporting model with torchscript"
pushd $repo/exp
ln -s pretrained.pt epoch-99.pt
popd
./conformer_ctc/export.py \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--jit 1
ls -lh $repo/exp
log "Generating H.fst, HL.fst"
./local/prepare_lang_fst.py --lang-dir $repo/data/lang_char --ngram-G $repo/data/lm/G_3_gram_char.fst.txt
ls -lh $repo/data/lang_char
log "Decoding with H on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model $repo/exp/cpu_jit.pt \
--H $repo/data/lang_char/H.fst \
--tokens $repo/data/lang_char/tokens.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
log "Decoding with HL on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model $repo/exp/cpu_jit.pt \
--HL $repo/data/lang_char/HL.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
log "Decoding with HLG on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model $repo/exp/cpu_jit.pt \
--HLG $repo/data/lang_char/HLG.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
rm -rf $repo

View File

@ -60,7 +60,7 @@ jobs:
- name: Install Python dependencies - name: Install Python dependencies
run: | run: |
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.* pip install --no-binary protobuf protobuf==3.20.*

21
egs/aishell/ASR/conformer_ctc/export.py Normal file → Executable file
View File

@ -23,12 +23,12 @@ import argparse
import logging import logging
from pathlib import Path from pathlib import Path
import k2
import torch import torch
from conformer import Conformer from conformer import Conformer
from icefall.checkpoint import average_checkpoints, load_checkpoint from icefall.checkpoint import average_checkpoints, load_checkpoint
from icefall.lexicon import Lexicon from icefall.utils import AttributeDict, num_tokens, str2bool
from icefall.utils import AttributeDict, str2bool
def get_parser(): def get_parser():
@ -63,11 +63,10 @@ def get_parser():
) )
parser.add_argument( parser.add_argument(
"--lang-dir", "--tokens",
type=str, type=str,
default="data/lang_char", required=True,
help="""It contains language related input files such as "lexicon.txt" help="Path to the tokens.txt.",
""",
) )
parser.add_argument( parser.add_argument(
@ -98,16 +97,16 @@ def get_params() -> AttributeDict:
def main(): def main():
args = get_parser().parse_args() args = get_parser().parse_args()
args.exp_dir = Path(args.exp_dir) args.exp_dir = Path(args.exp_dir)
args.lang_dir = Path(args.lang_dir)
params = get_params() params = get_params()
params.update(vars(args)) params.update(vars(args))
logging.info(params) # Load tokens.txt here
token_table = k2.SymbolTable.from_file(params.tokens)
lexicon = Lexicon(params.lang_dir) num_classes = num_tokens(token_table) + 1 # +1 for the blank
max_token_id = max(lexicon.tokens)
num_classes = max_token_id + 1 # +1 for the blank logging.info(params)
device = torch.device("cpu") device = torch.device("cpu")
if torch.cuda.is_available(): if torch.cuda.is_available():

View File

@ -0,0 +1 @@
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py

View File

@ -0,0 +1 @@
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py

View File

@ -0,0 +1 @@
../../../librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py

0
egs/aishell/ASR/conformer_ctc/test_transformer.py Normal file → Executable file
View File

View File

@ -0,0 +1 @@
../../../librispeech/ASR/local/prepare_lang_fst.py

View File

@ -143,6 +143,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
./local/prepare_lang.py --lang-dir $lang_phone_dir ./local/prepare_lang.py --lang-dir $lang_phone_dir
fi fi
# Train a bigram P for MMI training # Train a bigram P for MMI training
if [ ! -f $lang_phone_dir/transcript_words.txt ]; then if [ ! -f $lang_phone_dir/transcript_words.txt ]; then
log "Generate data to train phone based bigram P" log "Generate data to train phone based bigram P"
@ -203,6 +204,10 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
if [ ! -f $lang_char_dir/L_disambig.pt ]; then if [ ! -f $lang_char_dir/L_disambig.pt ]; then
./local/prepare_char.py --lang-dir $lang_char_dir ./local/prepare_char.py --lang-dir $lang_char_dir
fi fi
if [ ! -f $lang_char_dir/HLG.fst ]; then
./local/prepare_lang_fst.py --lang-dir $lang_phone_dir --ngram-G ./data/lm/G_3_gram.fst.txt
fi
fi fi
if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then

View File

@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
Usage: Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_H.py \ ./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \ --nn-model ./conformer_ctc/exp/cpu_jit.pt \
--H ./data/lang_bpe_500/H.fst \ --H ./data/lang_bpe_500/H.fst \
@ -14,6 +16,17 @@ Usage:
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \ ./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac ./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--H ./data/lang_char/H.fst \
--tokens ./data/lang_char/tokens.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt, Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1 you can use ./export.py --jit 1
""" """
@ -23,12 +36,11 @@ import logging
import math import math
from typing import Dict, List from typing import Dict, List
import kaldi_hmm_gmm
import kaldifeat import kaldifeat
import kaldifst import kaldifst
import torch import torch
import torchaudio import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence

View File

@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
Usage: Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HL.py \ ./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \ --nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HL ./data/lang_bpe_500/HL.fst \ --HL ./data/lang_bpe_500/HL.fst \
@ -14,6 +16,17 @@ Usage:
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \ ./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac ./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HL ./data/lang_char/HL.fst \
--words ./data/lang_char/words.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt, Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1 you can use ./export.py --jit 1
""" """
@ -23,12 +36,11 @@ import logging
import math import math
from typing import Dict, List from typing import Dict, List
import kaldi_hmm_gmm
import kaldifeat import kaldifeat
import kaldifst import kaldifst
import torch import torch
import torchaudio import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence

View File

@ -7,6 +7,8 @@ on CPU using OpenFST and decoders from kaldi.
Usage: Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HLG.py \ ./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \ --nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HLG ./data/lang_bpe_500/HLG.fst \ --HLG ./data/lang_bpe_500/HLG.fst \
@ -14,6 +16,16 @@ Usage:
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \ ./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac ./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HLG ./data/lang_char/HLG.fst \
--words ./data/lang_char/words.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt, Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1 you can use ./export.py --jit 1
""" """
@ -23,12 +35,11 @@ import logging
import math import math
from typing import Dict, List from typing import Dict, List
import kaldi_hmm_gmm
import kaldifeat import kaldifeat
import kaldifst import kaldifst
import torch import torch
import torchaudio import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence

View File

@ -28,7 +28,7 @@ import kaldifeat
import kaldifst import kaldifst
import torch import torch
import torchaudio import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence

View File

@ -28,7 +28,7 @@ import kaldifeat
import kaldifst import kaldifst
import torch import torch
import torchaudio import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence from torch.nn.utils.rnn import pad_sequence

View File

@ -1,17 +1,17 @@
# Introduction # Introduction
This folder uses [kaldifst][kaldifst] for graph construction This folder uses [kaldifst][kaldifst] for graph construction
and decoders from [kaldi-hmm-gmm][kaldi-hmm-gmm] for CTC decoding. and decoders from [kaldi-decoder][kaldi-decoder] for CTC decoding.
It supports only `CPU`. It supports only `CPU`.
You can use You can use
```bash ```bash
pip install kaldifst kaldi-hmm-gmm pip install kaldifst kaldi-decoder
``` ```
to install the dependencies. to install the dependencies.
[kaldi-hmm-gmm]: https://github.com/csukuangfj/kaldi-hmm-gmm [kaldi-decoder]: https://github.com/i2-fsa/kaldi-decoder
[kaldifst]: https://github.com/k2-fsa/kaldifst [kaldifst]: https://github.com/k2-fsa/kaldifst
[k2]: https://github.com/k2-fsa/k2 [k2]: https://github.com/k2-fsa/k2

View File

@ -1,7 +1,7 @@
kaldifst kaldifst
kaldilm kaldilm
kaldialign kaldialign
kaldi-hmm-gmm kaldi-decoder
sentencepiece>=0.1.96 sentencepiece>=0.1.96
tensorboard tensorboard
typeguard typeguard