Merge branch 'master' into wenetspeech
5
.flake8
@ -1,7 +1,7 @@
|
|||||||
[flake8]
|
[flake8]
|
||||||
show-source=true
|
show-source=true
|
||||||
statistics=true
|
statistics=true
|
||||||
max-line-length = 80
|
max-line-length = 88
|
||||||
per-file-ignores =
|
per-file-ignores =
|
||||||
# line too long
|
# line too long
|
||||||
icefall/diagnostics.py: E501,
|
icefall/diagnostics.py: E501,
|
||||||
@ -11,7 +11,8 @@ per-file-ignores =
|
|||||||
egs/*/ASR/*/scaling.py: E501,
|
egs/*/ASR/*/scaling.py: E501,
|
||||||
egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
|
egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
|
||||||
egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
|
egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
|
||||||
egs/librispeech/ASR/conformer_ctc2/*py: E501,
|
egs/librispeech/ASR/conformer_ctc*/*py: E501,
|
||||||
|
egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
|
||||||
egs/librispeech/ASR/RESULTS.md: E999,
|
egs/librispeech/ASR/RESULTS.md: E999,
|
||||||
|
|
||||||
# invalid escape sequence (cause by tex formular), W605
|
# invalid escape sequence (cause by tex formular), W605
|
||||||
|
|||||||
3
.git-blame-ignore-revs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
|
||||||
|
107df3b115a58f1b68a6458c3f94a130004be34c
|
||||||
|
d31db010371a4128856480382876acdc0d1739ed
|
||||||
@ -15,5 +15,5 @@ mkdir -p data
|
|||||||
cd data
|
cd data
|
||||||
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
|
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
|
||||||
cd ..
|
cd ..
|
||||||
./local/compute_fbank_librispeech.py
|
./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
|
||||||
ls -lh data/fbank/
|
ls -lh data/fbank/
|
||||||
|
|||||||
@ -25,7 +25,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
122
.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
vendored
Executable file
@ -0,0 +1,122 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
||||||
|
git lfs pull --include "exp/jit_trace.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.trace()"
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./conformer_ctc3/jit_pretrained.py \
|
||||||
|
--model-filename $repo/exp/jit_trace.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--G $repo/data/lm/G_4_gram.pt \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
|
||||||
|
./conformer_ctc3/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--lang-dir $repo/data/lang_bpe_500 \
|
||||||
|
--jit-trace 1 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.trace()"
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./conformer_ctc3/jit_pretrained.py \
|
||||||
|
--model-filename $repo/exp/jit_trace.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--G $repo/data/lm/G_4_gram.pt \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./conformer_ctc3/pretrained.py \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--G $repo/data/lm/G_4_gram.pt \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p conformer_ctc3/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt conformer_ctc3/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh conformer_ctc3/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in ctc-decoding 1best; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
./conformer_ctc3/decode.py \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--exp-dir conformer_ctc3/exp/ \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--decoding-method $method \
|
||||||
|
--lm-dir data/lm
|
||||||
|
done
|
||||||
|
|
||||||
|
rm conformer_ctc3/exp/*.pt
|
||||||
|
fi
|
||||||
191
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
vendored
Executable file
@ -0,0 +1,191 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
git lfs install
|
||||||
|
git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
abs_repo=$(realpath $repo)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Test exporting with torch.jit.trace()"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--jit-trace 1
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.trace()"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
||||||
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
||||||
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
for sym in 1 2 3; do
|
||||||
|
log "Greedy search with --max-sym-per-frame $sym"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame $sym \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in modified_beam_search beam_search fast_beam_search; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
|
||||||
|
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
|
||||||
|
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||||
|
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
||||||
|
lm_repo=$(basename $lm_repo_url)
|
||||||
|
pushd $lm_repo
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
mv exp/pretrained.pt exp/epoch-88.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
mkdir -p lstm_transducer_stateless2/exp
|
||||||
|
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh lstm_transducer_stateless2/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other with RNN LM"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/decode.py \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir lstm_transducer_stateless2/exp \
|
||||||
|
--max-duration 600 \
|
||||||
|
--decoding-method modified_beam_search_lm_shallow_fusion \
|
||||||
|
--beam 4 \
|
||||||
|
--use-shallow-fusion 1 \
|
||||||
|
--lm-type rnn \
|
||||||
|
--lm-exp-dir $lm_repo/exp \
|
||||||
|
--lm-epoch 88 \
|
||||||
|
--lm-avg 1 \
|
||||||
|
--lm-scale 0.3 \
|
||||||
|
--rnn-lm-num-layers 3 \
|
||||||
|
--rnn-lm-tie-weights 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
|
||||||
|
bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
|
||||||
|
log "Download bi-gram LM from ${bigram_repo_url}"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
|
||||||
|
bigramlm_repo=$(basename $bigram_repo_url)
|
||||||
|
pushd $bigramlm_repo
|
||||||
|
git lfs pull --include "2gram.fst.txt"
|
||||||
|
cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
|
||||||
|
popd
|
||||||
|
|
||||||
|
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||||
|
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
||||||
|
lm_repo=$(basename $lm_repo_url)
|
||||||
|
pushd $lm_repo
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
mv exp/pretrained.pt exp/epoch-88.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
mkdir -p lstm_transducer_stateless2/exp
|
||||||
|
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh lstm_transducer_stateless2/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/decode.py \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir lstm_transducer_stateless2/exp \
|
||||||
|
--max-duration 600 \
|
||||||
|
--decoding-method modified_beam_search_LODR \
|
||||||
|
--beam 4 \
|
||||||
|
--use-shallow-fusion 1 \
|
||||||
|
--lm-type rnn \
|
||||||
|
--lm-exp-dir $lm_repo/exp \
|
||||||
|
--lm-scale 0.4 \
|
||||||
|
--lm-epoch 88 \
|
||||||
|
--rnn-lm-avg 1 \
|
||||||
|
--rnn-lm-num-layers 3 \
|
||||||
|
--rnn-lm-tie-weights 1 \
|
||||||
|
--tokens-ngram 2 \
|
||||||
|
--ngram-lm-scale -0.16
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
||||||
|
mkdir -p lstm_transducer_stateless2/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh lstm_transducer_stateless2/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--exp-dir lstm_transducer_stateless2/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
rm lstm_transducer_stateless2/exp/*.pt
|
||||||
|
fi
|
||||||
@ -1,233 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
#
|
|
||||||
set -e
|
|
||||||
|
|
||||||
log() {
|
|
||||||
# This function is from espnet
|
|
||||||
local fname=${BASH_SOURCE[1]##*/}
|
|
||||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
||||||
}
|
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
|
|
||||||
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
|
||||||
|
|
||||||
log "Downloading pre-trained model from $repo_url"
|
|
||||||
git lfs install
|
|
||||||
git clone $repo_url
|
|
||||||
repo=$(basename $repo_url)
|
|
||||||
|
|
||||||
log "Display test files"
|
|
||||||
tree $repo/
|
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
|
||||||
|
|
||||||
pushd $repo/exp
|
|
||||||
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
|
|
||||||
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
|
||||||
popd
|
|
||||||
|
|
||||||
log "Install ncnn and pnnx"
|
|
||||||
|
|
||||||
# We are using a modified ncnn here. Will try to merge it to the official repo
|
|
||||||
# of ncnn
|
|
||||||
git clone https://github.com/csukuangfj/ncnn
|
|
||||||
pushd ncnn
|
|
||||||
git submodule init
|
|
||||||
git submodule update python/pybind11
|
|
||||||
python3 setup.py bdist_wheel
|
|
||||||
ls -lh dist/
|
|
||||||
pip install dist/*.whl
|
|
||||||
cd tools/pnnx
|
|
||||||
mkdir build
|
|
||||||
cd build
|
|
||||||
cmake ..
|
|
||||||
make -j4 pnnx
|
|
||||||
|
|
||||||
./src/pnnx || echo "pass"
|
|
||||||
|
|
||||||
popd
|
|
||||||
|
|
||||||
log "Test exporting to pnnx format"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/export.py \
|
|
||||||
--exp-dir $repo/exp \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch 99 \
|
|
||||||
--avg 1 \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
--pnnx 1
|
|
||||||
|
|
||||||
./ncnn/tools/pnnx/build/src/pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
|
||||||
./ncnn/tools/pnnx/build/src/pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
|
||||||
./ncnn/tools/pnnx/build/src/pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/ncnn-decode.py \
|
|
||||||
--bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
|
||||||
--bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
|
||||||
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
|
||||||
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
|
||||||
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
log "Test exporting with torch.jit.trace()"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/export.py \
|
|
||||||
--exp-dir $repo/exp \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch 99 \
|
|
||||||
--avg 1 \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
--jit-trace 1
|
|
||||||
|
|
||||||
log "Decode with models exported by torch.jit.trace()"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/jit_pretrained.py \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
|
||||||
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
|
||||||
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0002.wav
|
|
||||||
|
|
||||||
log "Test exporting to ONNX"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/export.py \
|
|
||||||
--exp-dir $repo/exp \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch 99 \
|
|
||||||
--avg 1 \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
--onnx 1
|
|
||||||
|
|
||||||
log "Decode with ONNX models "
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/streaming-onnx-decode.py \
|
|
||||||
--bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-model-filename $repo//exp/encoder.onnx \
|
|
||||||
--decoder-model-filename $repo/exp/decoder.onnx \
|
|
||||||
--joiner-model-filename $repo/exp/joiner.onnx \
|
|
||||||
--joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
|
|
||||||
--joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/streaming-onnx-decode.py \
|
|
||||||
--bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-model-filename $repo//exp/encoder.onnx \
|
|
||||||
--decoder-model-filename $repo/exp/decoder.onnx \
|
|
||||||
--joiner-model-filename $repo/exp/joiner.onnx \
|
|
||||||
--joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
|
|
||||||
--joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
|
|
||||||
$repo/test_wavs/1221-135766-0001.wav
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/streaming-onnx-decode.py \
|
|
||||||
--bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-model-filename $repo//exp/encoder.onnx \
|
|
||||||
--decoder-model-filename $repo/exp/decoder.onnx \
|
|
||||||
--joiner-model-filename $repo/exp/joiner.onnx \
|
|
||||||
--joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
|
|
||||||
--joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
|
|
||||||
$repo/test_wavs/1221-135766-0002.wav
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
|
||||||
log "Greedy search with --max-sym-per-frame $sym"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/pretrained.py \
|
|
||||||
--method greedy_search \
|
|
||||||
--max-sym-per-frame $sym \
|
|
||||||
--checkpoint $repo/exp/pretrained.pt \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0002.wav
|
|
||||||
done
|
|
||||||
|
|
||||||
for method in modified_beam_search beam_search fast_beam_search; do
|
|
||||||
log "$method"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/pretrained.py \
|
|
||||||
--method $method \
|
|
||||||
--beam-size 4 \
|
|
||||||
--checkpoint $repo/exp/pretrained.pt \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0002.wav
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
|
||||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
|
||||||
|
|
||||||
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
|
|
||||||
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
|
||||||
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
|
||||||
git clone $lm_repo_url
|
|
||||||
lm_repo=$(basename $lm_repo_url)
|
|
||||||
pushd $lm_repo
|
|
||||||
git lfs pull --include "exp/pretrained.pt"
|
|
||||||
cd exp
|
|
||||||
ln -s pretrained.pt epoch-88.pt
|
|
||||||
popd
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/decode.py \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
--epoch 99 \
|
|
||||||
--avg 1 \
|
|
||||||
--exp-dir $repo/exp \
|
|
||||||
--lang-dir $repo/data/lang_bpe_500 \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--max-duration 600 \
|
|
||||||
--decoding-method modified_beam_search_rnnlm_shallow_fusion \
|
|
||||||
--beam 4 \
|
|
||||||
--rnn-lm-scale 0.3 \
|
|
||||||
--rnn-lm-exp-dir $lm_repo/exp \
|
|
||||||
--rnn-lm-epoch 88 \
|
|
||||||
--rnn-lm-avg 1 \
|
|
||||||
--rnn-lm-num-layers 3 \
|
|
||||||
--rnn-lm-tie-weights 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
|
|
||||||
mkdir -p lstm_transducer_stateless2/exp
|
|
||||||
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
|
||||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
|
||||||
|
|
||||||
ls -lh data
|
|
||||||
ls -lh lstm_transducer_stateless2/exp
|
|
||||||
|
|
||||||
log "Decoding test-clean and test-other"
|
|
||||||
|
|
||||||
# use a small value for decoding with CPU
|
|
||||||
max_duration=100
|
|
||||||
|
|
||||||
for method in greedy_search fast_beam_search modified_beam_search; do
|
|
||||||
log "Decoding with $method"
|
|
||||||
|
|
||||||
./lstm_transducer_stateless2/decode.py \
|
|
||||||
--decoding-method $method \
|
|
||||||
--epoch 999 \
|
|
||||||
--avg 1 \
|
|
||||||
--use-averaged-model 0 \
|
|
||||||
--max-duration $max_duration \
|
|
||||||
--exp-dir lstm_transducer_stateless2/exp
|
|
||||||
done
|
|
||||||
|
|
||||||
rm lstm_transducer_stateless2/exp/*.pt
|
|
||||||
fi
|
|
||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -23,7 +23,6 @@ popd
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
@ -22,7 +22,6 @@ popd
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
@ -27,14 +26,6 @@ ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
|
|||||||
ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
|
ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
|
||||||
popd
|
popd
|
||||||
|
|
||||||
log "Test exporting to ONNX format"
|
|
||||||
|
|
||||||
./pruned_transducer_stateless3/export.py \
|
|
||||||
--exp-dir $repo/exp \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch 99 \
|
|
||||||
--avg 1 \
|
|
||||||
--onnx 1
|
|
||||||
|
|
||||||
log "Export to torchscript model"
|
log "Export to torchscript model"
|
||||||
./pruned_transducer_stateless3/export.py \
|
./pruned_transducer_stateless3/export.py \
|
||||||
@ -51,30 +42,8 @@ log "Export to torchscript model"
|
|||||||
--avg 1 \
|
--avg 1 \
|
||||||
--jit-trace 1
|
--jit-trace 1
|
||||||
|
|
||||||
ls -lh $repo/exp/*.onnx
|
|
||||||
ls -lh $repo/exp/*.pt
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
log "Decode with ONNX models"
|
|
||||||
|
|
||||||
./pruned_transducer_stateless3/onnx_check.py \
|
|
||||||
--jit-filename $repo/exp/cpu_jit.pt \
|
|
||||||
--onnx-encoder-filename $repo/exp/encoder.onnx \
|
|
||||||
--onnx-decoder-filename $repo/exp/decoder.onnx \
|
|
||||||
--onnx-joiner-filename $repo/exp/joiner.onnx \
|
|
||||||
--onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \
|
|
||||||
--onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx
|
|
||||||
|
|
||||||
./pruned_transducer_stateless3/onnx_pretrained.py \
|
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
||||||
--encoder-model-filename $repo/exp/encoder.onnx \
|
|
||||||
--decoder-model-filename $repo/exp/decoder.onnx \
|
|
||||||
--joiner-model-filename $repo/exp/joiner.onnx \
|
|
||||||
--joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
|
|
||||||
--joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
|
|
||||||
$repo/test_wavs/1089-134686-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0001.wav \
|
|
||||||
$repo/test_wavs/1221-135766-0002.wav
|
|
||||||
|
|
||||||
log "Decode with models exported by torch.jit.trace()"
|
log "Decode with models exported by torch.jit.trace()"
|
||||||
|
|
||||||
./pruned_transducer_stateless3/jit_pretrained.py \
|
./pruned_transducer_stateless3/jit_pretrained.py \
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
@ -33,6 +32,7 @@ popd
|
|||||||
log "Export to torchscript model"
|
log "Export to torchscript model"
|
||||||
./pruned_transducer_stateless7/export.py \
|
./pruned_transducer_stateless7/export.py \
|
||||||
--exp-dir $repo/exp \
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
--epoch 99 \
|
--epoch 99 \
|
||||||
--avg 1 \
|
--avg 1 \
|
||||||
|
|||||||
150
.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
vendored
Executable file
@ -0,0 +1,150 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
||||||
|
git lfs pull --include "exp/cpu_jit.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
./pruned_transducer_stateless7_ctc/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
|
||||||
|
--model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--G $repo/data/lm/G_4_gram.pt \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for sym in 1 2 3; do
|
||||||
|
log "Greedy search with --max-sym-per-frame $sym"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame $sym \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in modified_beam_search beam_search fast_beam_search; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--G $repo/data/lm/G_4_gram.pt \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p pruned_transducer_stateless7_ctc/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh pruned_transducer_stateless7_ctc/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--exp-dir pruned_transducer_stateless7_ctc/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc/ctc_decode.py \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir ./pruned_transducer_stateless7_ctc/exp \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--decoding-method $m \
|
||||||
|
--hlg-scale 0.6 \
|
||||||
|
--lm-dir data/lm
|
||||||
|
done
|
||||||
|
|
||||||
|
rm pruned_transducer_stateless7_ctc/exp/*.pt
|
||||||
|
fi
|
||||||
147
.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
vendored
Executable file
@ -0,0 +1,147 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/cpu_jit.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
|
||||||
|
--model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for sym in 1 2 3; do
|
||||||
|
log "Greedy search with --max-sym-per-frame $sym"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame $sym \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in modified_beam_search beam_search fast_beam_search; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
||||||
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--method $m \
|
||||||
|
--sample-rate 16000 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p pruned_transducer_stateless7_ctc_bs/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc_bs/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh pruned_transducer_stateless7_ctc_bs/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--exp-dir pruned_transducer_stateless7_ctc_bs/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
for m in ctc-decoding 1best; do
|
||||||
|
./pruned_transducer_stateless7_ctc_bs/ctc_decode.py \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--decoding-method $m \
|
||||||
|
--hlg-scale 0.6
|
||||||
|
done
|
||||||
|
|
||||||
|
rm pruned_transducer_stateless7_ctc_bs/exp/*.pt
|
||||||
|
fi
|
||||||
148
.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
vendored
Executable file
@ -0,0 +1,148 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
git lfs install
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/cpu_jit.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
git lfs pull --include "exp/encoder_jit_trace.pt"
|
||||||
|
git lfs pull --include "exp/decoder_jit_trace.pt"
|
||||||
|
git lfs pull --include "exp/joiner_jit_trace.pt"
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
./pruned_transducer_stateless7_streaming/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
log "Export to torchscript model by torch.jit.trace()"
|
||||||
|
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.trace()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
||||||
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
||||||
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
for sym in 1 2 3; do
|
||||||
|
log "Greedy search with --max-sym-per-frame $sym"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame $sym \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in modified_beam_search beam_search fast_beam_search; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p pruned_transducer_stateless7_streaming/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_streaming/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh pruned_transducer_stateless7_streaming/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
num_decode_stream=200
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "decoding with $method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--exp-dir pruned_transducer_stateless7_streaming/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/streaming_decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--num-decode-streams $num_decode_stream
|
||||||
|
--exp-dir pruned_transducer_stateless7_streaming/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
rm pruned_transducer_stateless7_streaming/exp/*.pt
|
||||||
|
fi
|
||||||
115
.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
vendored
Executable file
@ -0,0 +1,115 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
git lfs install
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/cpu_jit.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless8/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
./pruned_transducer_stateless8/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless8/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
for sym in 1 2 3; do
|
||||||
|
log "Greedy search with --max-sym-per-frame $sym"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless8/pretrained.py \
|
||||||
|
--method greedy_search \
|
||||||
|
--max-sym-per-frame $sym \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
for method in modified_beam_search beam_search fast_beam_search; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless8/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--beam-size 4 \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p pruned_transducer_stateless8/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless8/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh pruned_transducer_stateless8/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless8/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--exp-dir pruned_transducer_stateless8/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
rm pruned_transducer_stateless8/exp/*.pt
|
||||||
|
fi
|
||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
102
.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
vendored
Executable file
@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08
|
||||||
|
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
log "Display test files"
|
||||||
|
tree $repo/
|
||||||
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
|
pushd $repo/exp
|
||||||
|
git lfs pull --include "data/lang_bpe_500/3gram.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/4gram.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/cpu_jit.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
ls -lh *.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export to torchscript model"
|
||||||
|
./zipformer_mmi/export.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model false \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
ls -lh $repo/exp/*.pt
|
||||||
|
|
||||||
|
log "Decode with models exported by torch.jit.script()"
|
||||||
|
|
||||||
|
./zipformer_mmi/jit_pretrained.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--lang-dir $repo/data/lang_bpe_500 \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
|
||||||
|
log "$method"
|
||||||
|
|
||||||
|
./zipformer_mmi/pretrained.py \
|
||||||
|
--method $method \
|
||||||
|
--checkpoint $repo/exp/pretrained.pt \
|
||||||
|
--lang-dir $repo/data/lang_bpe_500 \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||||
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
|
mkdir -p zipformer_mmi/exp
|
||||||
|
ln -s $PWD/$repo/exp/pretrained.pt zipformer_mmi/exp/epoch-999.pt
|
||||||
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
|
ls -lh data
|
||||||
|
ls -lh zipformer_mmi/exp
|
||||||
|
|
||||||
|
log "Decoding test-clean and test-other"
|
||||||
|
|
||||||
|
# use a small value for decoding with CPU
|
||||||
|
max_duration=100
|
||||||
|
|
||||||
|
for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
|
||||||
|
log "Decoding with $method"
|
||||||
|
|
||||||
|
./zipformer_mmi/decode.py \
|
||||||
|
--decoding-method $method \
|
||||||
|
--epoch 999 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--nbest-scale 1.2 \
|
||||||
|
--hp-scale 1.0 \
|
||||||
|
--max-duration $max_duration \
|
||||||
|
--lang-dir $repo/data/lang_bpe_500 \
|
||||||
|
--exp-dir zipformer_mmi/exp
|
||||||
|
done
|
||||||
|
|
||||||
|
rm zipformer_mmi/exp/*.pt
|
||||||
|
fi
|
||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.flac
|
|
||||||
ls -lh $repo/test_wavs/*.flac
|
ls -lh $repo/test_wavs/*.flac
|
||||||
|
|
||||||
log "CTC decoding"
|
log "CTC decoding"
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
for sym in 1 2 3; do
|
for sym in 1 2 3; do
|
||||||
|
|||||||
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
log "Beam search decoding"
|
log "Beam search decoding"
|
||||||
|
|||||||
@ -20,7 +20,6 @@ repo=$(basename $repo_url)
|
|||||||
|
|
||||||
log "Display test files"
|
log "Display test files"
|
||||||
tree $repo/
|
tree $repo/
|
||||||
soxi $repo/test_wavs/*.wav
|
|
||||||
ls -lh $repo/test_wavs/*.wav
|
ls -lh $repo/test_wavs/*.wav
|
||||||
|
|
||||||
pushd $repo/exp
|
pushd $repo/exp
|
||||||
|
|||||||
234
.github/scripts/test-ncnn-export.sh
vendored
Executable file
@ -0,0 +1,234 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
pushd egs/librispeech/ASR
|
||||||
|
|
||||||
|
log "Install ncnn and pnnx"
|
||||||
|
|
||||||
|
# We are using a modified ncnn here. Will try to merge it to the official repo
|
||||||
|
# of ncnn
|
||||||
|
git clone https://github.com/csukuangfj/ncnn
|
||||||
|
pushd ncnn
|
||||||
|
git submodule init
|
||||||
|
git submodule update python/pybind11
|
||||||
|
python3 setup.py bdist_wheel
|
||||||
|
ls -lh dist/
|
||||||
|
pip install dist/*.whl
|
||||||
|
cd tools/pnnx
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
|
||||||
|
echo "which python3"
|
||||||
|
|
||||||
|
which python3
|
||||||
|
#/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
|
||||||
|
|
||||||
|
cmake -D Python3_EXECUTABLE=$(which python3) ..
|
||||||
|
make -j4 pnnx
|
||||||
|
|
||||||
|
./src/pnnx || echo "pass"
|
||||||
|
|
||||||
|
popd
|
||||||
|
|
||||||
|
export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.trace()"
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
\
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--chunk-length 32 \
|
||||||
|
--cnn-module-kernel 31 \
|
||||||
|
--left-context-length 32 \
|
||||||
|
--right-context-length 8 \
|
||||||
|
--memory-size 32
|
||||||
|
|
||||||
|
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.trace()"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0
|
||||||
|
|
||||||
|
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
python3 ./lstm_transducer_stateless2/ncnn-decode.py \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
\
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--num-encoder-layers "2,4,3,2,4" \
|
||||||
|
--feedforward-dims "1024,1024,2048,2048,1024" \
|
||||||
|
--nhead "8,8,8,8,8" \
|
||||||
|
--encoder-dims "384,384,384,384,384" \
|
||||||
|
--attention-dims "192,192,192,192,192" \
|
||||||
|
--encoder-unmasked-dims "256,256,256,256,256" \
|
||||||
|
--zipformer-downsampling-factors "1,2,4,8,2" \
|
||||||
|
--cnn-module-kernels "31,31,31,31,31" \
|
||||||
|
--decoder-dim 512 \
|
||||||
|
--joiner-dim 512
|
||||||
|
|
||||||
|
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_char_bpe/L.pt"
|
||||||
|
git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
|
||||||
|
git lfs pull --include "data/lang_char_bpe/Linv.pt"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
|
||||||
|
--lang-dir $repo/data/lang_char_bpe \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--num-encoder-layers "2,4,3,2,4" \
|
||||||
|
--feedforward-dims "1024,1024,1536,1536,1024" \
|
||||||
|
--nhead "8,8,8,8,8" \
|
||||||
|
--encoder-dims "384,384,384,384,384" \
|
||||||
|
--attention-dims "192,192,192,192,192" \
|
||||||
|
--encoder-unmasked-dims "256,256,256,256,256" \
|
||||||
|
--zipformer-downsampling-factors "1,2,4,8,2" \
|
||||||
|
--cnn-module-kernels "31,31,31,31,31" \
|
||||||
|
--decoder-dim 512 \
|
||||||
|
--joiner-dim 512
|
||||||
|
|
||||||
|
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||||
|
--tokens $repo/data/lang_char_bpe/tokens.txt \
|
||||||
|
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
$repo/test_wavs/0.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
351
.github/scripts/test-onnx-export.sh
vendored
Executable file
@ -0,0 +1,351 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
log() {
|
||||||
|
# This function is from espnet
|
||||||
|
local fname=${BASH_SOURCE[1]##*/}
|
||||||
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
git lfs install
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.trace()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--exp-dir $repo/exp/
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--exp-dir $repo/exp/
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "Run onnx_check.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/onnx_check.py \
|
||||||
|
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
|
||||||
|
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
|
||||||
|
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
|
||||||
|
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
|
||||||
|
log "Downloading pre-trained model from $repo_url"
|
||||||
|
git lfs install
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless3/export.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 9999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp/ \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless3/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 9999 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp/
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "Run onnx_check.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless3/onnx_check.py \
|
||||||
|
--jit-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
|
||||||
|
--onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
|
||||||
|
--onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless3/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless5/export.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--num-encoder-layers 18 \
|
||||||
|
--dim-feedforward 2048 \
|
||||||
|
--nhead 8 \
|
||||||
|
--encoder-dim 512 \
|
||||||
|
--decoder-dim 512 \
|
||||||
|
--joiner-dim 512 \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless5/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--num-encoder-layers 18 \
|
||||||
|
--dim-feedforward 2048 \
|
||||||
|
--nhead 8 \
|
||||||
|
--encoder-dim 512 \
|
||||||
|
--decoder-dim 512 \
|
||||||
|
--joiner-dim 512
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "Run onnx_check.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless5/onnx_check.py \
|
||||||
|
--jit-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless5/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.script()"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7/export.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--feedforward-dims "1024,1024,2048,2048,1024" \
|
||||||
|
--jit 1
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--feedforward-dims "1024,1024,2048,2048,1024"
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "Run onnx_check.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7/onnx_check.py \
|
||||||
|
--jit-filename $repo/exp/cpu_jit.pt \
|
||||||
|
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp \
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--chunk-length 32 \
|
||||||
|
--cnn-module-kernel 31 \
|
||||||
|
--left-context-length 32 \
|
||||||
|
--right-context-length 8 \
|
||||||
|
--memory-size 32
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "=========================================================================="
|
||||||
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
|
pushd $repo
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
||||||
|
|
||||||
|
cd exp
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
|
log "Export via torch.jit.trace()"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp/ \
|
||||||
|
--jit-trace 1
|
||||||
|
|
||||||
|
log "Test exporting to ONNX format"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export-onnx.py \
|
||||||
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "Run onnx_check.py"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/onnx_check.py \
|
||||||
|
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
|
||||||
|
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
|
||||||
|
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
|
||||||
|
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
||||||
|
|
||||||
|
log "Run onnx_pretrained.py"
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav
|
||||||
|
|
||||||
|
rm -rf $repo
|
||||||
|
log "--------------------------------------------------------------------------"
|
||||||
4
.github/workflows/build-doc.yml
vendored
@ -26,6 +26,10 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: build_doc-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-doc:
|
build-doc:
|
||||||
if: github.event.label.name == 'doc' || github.event_name == 'push'
|
if: github.event.label.name == 'doc' || github.event_name == 'push'
|
||||||
|
|||||||
8
.github/workflows/run-aishell-2022-06-20.yml
vendored
@ -34,6 +34,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_aishell_2022_06_20-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_aishell_2022_06_20:
|
run_aishell_2022_06_20:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -61,7 +65,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -83,7 +87,7 @@ jobs:
|
|||||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_gigaspeech_2022_05_13-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_gigaspeech_2022_05_13:
|
run_gigaspeech_2022_05_13:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_03_12-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_2022_03_12:
|
run_librispeech_2022_03_12:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_04_29-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_2022_04_29:
|
run_librispeech_2022_04_29:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_05_13-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_2022_05_13:
|
run_librispeech_2022_05_13:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_11_11_zipformer-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_2022_11_11_zipformer:
|
run_librispeech_2022_11_11_zipformer:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
159
.github/workflows/run-librispeech-2022-11-14-stateless8.yml
vendored
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-2022-11-14-stateless8
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_11_14_zipformer_stateless8-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_11_14_zipformer_stateless8:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech pruned_transducer_stateless8
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./pruned_transducer_stateless8/exp
|
||||||
|
|
||||||
|
cd pruned_transducer_stateless8
|
||||||
|
echo "results for pruned_transducer_stateless8"
|
||||||
|
echo "===greedy search==="
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===fast_beam_search==="
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===modified beam search==="
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech pruned_transducer_stateless8
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless8-2022-11-14
|
||||||
|
path: egs/librispeech/ASR/pruned_transducer_stateless8/exp/
|
||||||
163
.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml
vendored
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-2022-12-01-stateless7-ctc
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_11_11_zipformer:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech pruned_transducer_stateless7_ctc
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./pruned_transducer_stateless7_ctc/exp
|
||||||
|
|
||||||
|
cd pruned_transducer_stateless7_ctc
|
||||||
|
echo "results for pruned_transducer_stateless7_ctc"
|
||||||
|
echo "===greedy search==="
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===fast_beam_search==="
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===modified beam search==="
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===ctc decoding==="
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===1best==="
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-2022-12-01
|
||||||
|
path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc/exp/
|
||||||
167
.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml
vendored
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
# Copyright 2022 Zengwei Yao
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-2022-12-08-zipformer-mmi
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_12_08_zipformer-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_12_08_zipformer:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech zipformer-mmi
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./zipformer-mmi/exp
|
||||||
|
|
||||||
|
cd zipformer-mmi
|
||||||
|
echo "results for zipformer-mmi"
|
||||||
|
echo "===1best==="
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===nbest==="
|
||||||
|
find exp/nbest -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/nbest -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===nbest-rescoring-LG==="
|
||||||
|
find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===nbest-rescoring-3-gram==="
|
||||||
|
find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===nbest-rescoring-4-gram==="
|
||||||
|
find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech zipformer-mmi
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer_mmi-2022-12-08
|
||||||
|
path: egs/librispeech/ASR/zipformer_mmi/exp/
|
||||||
163
.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml
vendored
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-2022-12-15-stateless7-ctc-bs
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_12_15_zipformer_ctc_bs:
|
||||||
|
if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech pruned_transducer_stateless7_ctc_bs
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./pruned_transducer_stateless7_ctc_bs/exp
|
||||||
|
|
||||||
|
cd pruned_transducer_stateless7_ctc_bs
|
||||||
|
echo "results for pruned_transducer_stateless7_ctc_bs"
|
||||||
|
echo "===greedy search==="
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===fast_beam_search==="
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===modified beam search==="
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===ctc decoding==="
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===1best==="
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc_bs
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15
|
||||||
|
path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/
|
||||||
172
.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml
vendored
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-2022-12-29-stateless7-streaming
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_12_29_zipformer_streaming-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_12_29_zipformer_streaming:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'streaming-zipformer' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech pruned_transducer_stateless7_streaming
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./pruned_transducer_stateless7_streaming/exp
|
||||||
|
|
||||||
|
cd pruned_transducer_stateless7_streaming
|
||||||
|
echo "results for pruned_transducer_stateless7_streaming"
|
||||||
|
echo "===greedy search==="
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===fast_beam_search==="
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===modified beam search==="
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===streaming greedy search==="
|
||||||
|
find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===streaming fast_beam_search==="
|
||||||
|
find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===streaming modified beam search==="
|
||||||
|
find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech pruned_transducer_stateless7_streaming
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-streaming-2022-12-29
|
||||||
|
path: egs/librispeech/ASR/pruned_transducer_stateless7_streaming/exp/
|
||||||
155
.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
vendored
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
||||||
|
|
||||||
|
# See ../../LICENSE for clarification regarding multiple authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
name: run-librispeech-conformer-ctc3-2022-11-28
|
||||||
|
# zipformer
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_11_28_conformer_ctc3-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_librispeech_2022_11_28_conformer_ctc3:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other datasets
|
||||||
|
id: libri-test-clean-and-test-other-data
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/download
|
||||||
|
key: cache-libri-test-clean-and-test-other
|
||||||
|
|
||||||
|
- name: Download LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
||||||
|
|
||||||
|
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
||||||
|
|
||||||
|
- name: Cache LibriSpeech test-clean and test-other fbank features
|
||||||
|
id: libri-test-clean-and-test-other-fbank
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/fbank-libri
|
||||||
|
key: cache-libri-fbank-test-clean-and-test-other-v2
|
||||||
|
|
||||||
|
- name: Compute fbank for LibriSpeech test-clean and test-other
|
||||||
|
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
||||||
|
|
||||||
|
- name: Inference with pre-trained model
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
mkdir -p egs/librispeech/ASR/data
|
||||||
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
|
sudo apt-get -qq install git-lfs tree
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
|
||||||
|
|
||||||
|
- name: Display decoding results for librispeech conformer_ctc3
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR/
|
||||||
|
tree ./conformer_ctc3/exp
|
||||||
|
|
||||||
|
cd conformer_ctc3
|
||||||
|
echo "results for conformer_ctc3"
|
||||||
|
echo "===ctc-decoding==="
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
echo "===1best==="
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Upload decoding results for librispeech conformer_ctc3
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
|
with:
|
||||||
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-conformer_ctc3-2022-11-28
|
||||||
|
path: egs/librispeech/ASR/conformer_ctc3/exp/
|
||||||
@ -16,9 +16,13 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'ncnn' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@ -43,7 +47,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -102,12 +106,12 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
|
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
|
||||||
|
|
||||||
- name: Display decoding results for lstm_transducer_stateless2
|
- name: Display decoding results for lstm_transducer_stateless2
|
||||||
if: github.event_name == 'schedule'
|
if: github.event_name == 'schedule'
|
||||||
@ -135,13 +139,25 @@ jobs:
|
|||||||
cd egs/librispeech/ASR
|
cd egs/librispeech/ASR
|
||||||
tree lstm_transducer_stateless2/exp
|
tree lstm_transducer_stateless2/exp
|
||||||
cd lstm_transducer_stateless2/exp
|
cd lstm_transducer_stateless2/exp
|
||||||
echo "===modified_beam_search_rnnlm_shallow_fusion==="
|
echo "===modified_beam_search_lm_shallow_fusion==="
|
||||||
find modified_beam_search_rnnlm_shallow_fusion -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
echo "===Using RNNLM==="
|
||||||
find modified_beam_search_rnnlm_shallow_fusion -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
|
- name: Display decoding results for lstm_transducer_stateless2
|
||||||
|
if: github.event.label.name == 'LODR'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
tree lstm_transducer_stateless2/exp
|
||||||
|
cd lstm_transducer_stateless2/exp
|
||||||
|
echo "===modified_beam_search_rnnlm_LODR==="
|
||||||
|
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
||||||
|
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||||
|
|
||||||
- name: Upload decoding results for lstm_transducer_stateless2
|
- name: Upload decoding results for lstm_transducer_stateless2
|
||||||
uses: actions/upload-artifact@v2
|
uses: actions/upload-artifact@v2
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion'
|
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
|
||||||
with:
|
with:
|
||||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-lstm_transducer_stateless2-2022-09-03
|
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-lstm_transducer_stateless2-2022-09-03
|
||||||
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
||||||
|
|||||||
@ -33,9 +33,13 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_pruned_transducer_stateless3_2022_05_13-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_pruned_transducer_stateless3_2022_05_13:
|
run_librispeech_pruned_transducer_stateless3_2022_05_13:
|
||||||
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_streaming_2022_06_26-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_streaming_2022_06_26:
|
run_librispeech_streaming_2022_06_26:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -33,6 +33,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_librispeech_2022_04_19-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_2022_04_19:
|
run_librispeech_2022_04_19:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -60,7 +64,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -119,7 +123,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -23,6 +23,10 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_conformer_ctc-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_conformer_ctc:
|
run_pre_trained_conformer_ctc:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
||||||
@ -50,7 +54,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -69,7 +73,7 @@ jobs:
|
|||||||
- name: Inference with pre-trained model
|
- name: Inference with pre-trained model
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -32,6 +32,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h:
|
run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -59,7 +63,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -118,7 +122,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -32,6 +32,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h:
|
run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -59,7 +63,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -118,7 +122,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -23,6 +23,10 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer_stateless_modified_2_aishell-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer_stateless_modified_2_aishell:
|
run_pre_trained_transducer_stateless_modified_2_aishell:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
||||||
@ -50,7 +54,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -69,7 +73,7 @@ jobs:
|
|||||||
- name: Inference with pre-trained model
|
- name: Inference with pre-trained model
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -23,6 +23,10 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer_stateless_modified_aishell-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer_stateless_modified_aishell:
|
run_pre_trained_transducer_stateless_modified_aishell:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
||||||
@ -50,7 +54,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -69,7 +73,7 @@ jobs:
|
|||||||
- name: Inference with pre-trained model
|
- name: Inference with pre-trained model
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -32,6 +32,10 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer_stateless-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer_stateless:
|
run_pre_trained_transducer_stateless:
|
||||||
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
@ -59,7 +63,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -118,7 +122,7 @@ jobs:
|
|||||||
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
||||||
ls -lh egs/librispeech/ASR/data/*
|
ls -lh egs/librispeech/ASR/data/*
|
||||||
|
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
@ -23,6 +23,10 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_pre_trained_transducer-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_pre_trained_transducer:
|
run_pre_trained_transducer:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
||||||
@ -50,7 +54,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -69,7 +73,7 @@ jobs:
|
|||||||
- name: Inference with pre-trained model
|
- name: Inference with pre-trained model
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
71
.github/workflows/run-ptb-rnn-lm.yml
vendored
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
name: run-ptb-rnn-lm-training
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_ptb_rnn_lm_training-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
run_ptb_rnn_lm_training:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: ["3.8"]
|
||||||
|
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Prepare data
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
cd egs/ptb/LM
|
||||||
|
./prepare.sh
|
||||||
|
|
||||||
|
- name: Run training
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
cd egs/ptb/LM
|
||||||
|
./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
|
||||||
|
|
||||||
|
- name: Upload pretrained models
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
with:
|
||||||
|
name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
|
||||||
|
path: egs/ptb/LM/my-rnnlm-exp/
|
||||||
@ -23,8 +23,12 @@ on:
|
|||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run_librispeech_pruned_transducer_stateless3_2022_05_13:
|
run_wenetspeech_pruned_transducer_stateless2:
|
||||||
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
|
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
@ -50,7 +54,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Cache kaldifeat
|
- name: Cache kaldifeat
|
||||||
id: my-cache
|
id: my-cache
|
||||||
@ -72,7 +76,7 @@ jobs:
|
|||||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
run: |
|
run: |
|
||||||
sudo apt-get -qq install git-lfs tree sox
|
sudo apt-get -qq install git-lfs tree
|
||||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|||||||
14
.github/workflows/run-yesno-recipe.yml
vendored
@ -21,17 +21,21 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: run-yesno-recipe-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
run-yesno-recipe:
|
run-yesno-recipe:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# os: [ubuntu-18.04, macos-10.15]
|
# os: [ubuntu-18.04, macos-10.15]
|
||||||
# TODO: enable macOS for CPU testing
|
# TODO: enable macOS for CPU testing
|
||||||
os: [ubuntu-18.04]
|
os: [ubuntu-latest]
|
||||||
python-version: [3.8]
|
python-version: [3.8]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
|
||||||
@ -61,9 +65,9 @@ jobs:
|
|||||||
|
|
||||||
- name: Install Python dependencies
|
- name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
- name: Run yesno recipe
|
- name: Run yesno recipe
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
15
.github/workflows/style_check.yml
vendored
@ -24,6 +24,10 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: style_check-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
style_check:
|
style_check:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
@ -45,17 +49,18 @@ jobs:
|
|||||||
|
|
||||||
- name: Install Python dependencies
|
- name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
|
python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0
|
||||||
# See https://github.com/psf/black/issues/2964
|
# Click issue fixed in https://github.com/psf/black/pull/2966
|
||||||
# The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
|
|
||||||
|
|
||||||
- name: Run flake8
|
- name: Run flake8
|
||||||
shell: bash
|
shell: bash
|
||||||
working-directory: ${{github.workspace}}
|
working-directory: ${{github.workspace}}
|
||||||
run: |
|
run: |
|
||||||
# stop the build if there are Python syntax errors or undefined names
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
flake8 . --count --show-source --statistics
|
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||||
flake8 .
|
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||||
|
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 \
|
||||||
|
--statistics --extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503
|
||||||
|
|
||||||
- name: Run black
|
- name: Run black
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
75
.github/workflows/test-ncnn-export.yml
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
name: test-ncnn-export
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: test_ncnn_export-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test_ncnn_export:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'ncnn' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Test ncnn export
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/test-ncnn-export.sh
|
||||||
75
.github/workflows/test-onnx-export.yml
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
name: test-onnx-export
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [labeled]
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
# minute (0-59)
|
||||||
|
# hour (0-23)
|
||||||
|
# day of the month (1-31)
|
||||||
|
# month (1-12)
|
||||||
|
# day of the week (0-6)
|
||||||
|
# nightly build at 15:50 UTC time every day
|
||||||
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: test_onnx_export-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test_onnx_export:
|
||||||
|
if: github.event.label.name == 'ready' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: [3.8]
|
||||||
|
fail-fast: false
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Setup Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
cache: 'pip'
|
||||||
|
cache-dependency-path: '**/requirements-ci.txt'
|
||||||
|
|
||||||
|
- name: Install Python dependencies
|
||||||
|
run: |
|
||||||
|
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||||
|
pip uninstall -y protobuf
|
||||||
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
|
- name: Cache kaldifeat
|
||||||
|
id: my-cache
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/tmp/kaldifeat
|
||||||
|
key: cache-tmp-${{ matrix.python-version }}-2022-09-25
|
||||||
|
|
||||||
|
- name: Install kaldifeat
|
||||||
|
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/install-kaldifeat.sh
|
||||||
|
|
||||||
|
- name: Test ONNX export
|
||||||
|
shell: bash
|
||||||
|
env:
|
||||||
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
run: |
|
||||||
|
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||||
|
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||||
|
|
||||||
|
.github/scripts/test-onnx-export.sh
|
||||||
67
.github/workflows/test.yml
vendored
@ -21,26 +21,23 @@ on:
|
|||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: test-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
if: github.event.label.name == 'ready' || github.event_name == 'push'
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# os: [ubuntu-18.04, macos-10.15]
|
os: [ubuntu-latest]
|
||||||
# disable macOS test for now.
|
python-version: ["3.8"]
|
||||||
os: [ubuntu-18.04]
|
torch: ["1.10.0"]
|
||||||
python-version: [3.7, 3.8]
|
torchaudio: ["0.10.0"]
|
||||||
torch: ["1.8.0", "1.11.0"]
|
k2-version: ["1.23.2.dev20221201"]
|
||||||
torchaudio: ["0.8.0", "0.11.0"]
|
|
||||||
k2-version: ["1.15.1.dev20220427"]
|
|
||||||
exclude:
|
|
||||||
- torch: "1.8.0"
|
|
||||||
torchaudio: "0.11.0"
|
|
||||||
- torch: "1.11.0"
|
|
||||||
torchaudio: "0.8.0"
|
|
||||||
|
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
|
||||||
@ -59,7 +56,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
sudo apt update
|
sudo apt update
|
||||||
sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
|
sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
|
||||||
sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
|
sudo apt install -q -y --fix-missing libsox-dev libsox-fmt-all
|
||||||
|
|
||||||
- name: Install Python dependencies
|
- name: Install Python dependencies
|
||||||
run: |
|
run: |
|
||||||
@ -67,21 +64,16 @@ jobs:
|
|||||||
# numpy 1.20.x does not support python 3.6
|
# numpy 1.20.x does not support python 3.6
|
||||||
pip install numpy==1.19
|
pip install numpy==1.19
|
||||||
pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||||
if [[ ${{ matrix.torchaudio }} == "0.11.0" ]]; then
|
pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||||
pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
|
||||||
else
|
|
||||||
pip install torchaudio==${{ matrix.torchaudio }}
|
|
||||||
fi
|
|
||||||
|
|
||||||
pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
|
pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
|
||||||
pip install git+https://github.com/lhotse-speech/lhotse
|
pip install git+https://github.com/lhotse-speech/lhotse
|
||||||
# icefall requirements
|
# icefall requirements
|
||||||
pip uninstall -y protobuf
|
pip uninstall -y protobuf
|
||||||
pip install --no-binary protobuf protobuf
|
pip install --no-binary protobuf protobuf==3.20.*
|
||||||
|
|
||||||
pip install kaldifst
|
pip install kaldifst
|
||||||
pip install onnxruntime
|
pip install onnxruntime
|
||||||
|
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
- name: Install graphviz
|
- name: Install graphviz
|
||||||
@ -121,19 +113,20 @@ jobs:
|
|||||||
cd ../pruned_transducer_stateless4
|
cd ../pruned_transducer_stateless4
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
|
|
||||||
|
cd ../pruned_transducer_stateless7
|
||||||
|
pytest -v -s
|
||||||
|
|
||||||
cd ../transducer_stateless
|
cd ../transducer_stateless
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
|
|
||||||
if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
|
# cd ../transducer
|
||||||
cd ../transducer
|
# pytest -v -s
|
||||||
pytest -v -s
|
|
||||||
|
|
||||||
cd ../transducer_stateless2
|
cd ../transducer_stateless2
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
|
|
||||||
cd ../transducer_lstm
|
cd ../transducer_lstm
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
if: startsWith(matrix.os, 'macos')
|
if: startsWith(matrix.os, 'macos')
|
||||||
@ -164,13 +157,11 @@ jobs:
|
|||||||
cd ../transducer_stateless
|
cd ../transducer_stateless
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
|
|
||||||
if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
|
# cd ../transducer
|
||||||
cd ../transducer
|
# pytest -v -s
|
||||||
pytest -v -s
|
|
||||||
|
|
||||||
cd ../transducer_stateless2
|
cd ../transducer_stateless2
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
|
|
||||||
cd ../transducer_lstm
|
cd ../transducer_lstm
|
||||||
pytest -v -s
|
pytest -v -s
|
||||||
fi
|
|
||||||
|
|||||||
21
.gitignore
vendored
@ -11,5 +11,26 @@ log
|
|||||||
*.bak
|
*.bak
|
||||||
*-bak
|
*-bak
|
||||||
*bak.py
|
*bak.py
|
||||||
|
|
||||||
|
# Ignore Mac system files
|
||||||
|
.DS_store
|
||||||
|
|
||||||
|
# Ignore node_modules folder
|
||||||
|
node_modules
|
||||||
|
|
||||||
|
# ignore .nfs
|
||||||
|
|
||||||
|
.nfs*
|
||||||
|
|
||||||
|
# Ignore all text files
|
||||||
|
*.txt
|
||||||
|
|
||||||
|
# Ignore files related to API keys
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Ignore SASS config files
|
||||||
|
.sass-cache
|
||||||
|
|
||||||
*.param
|
*.param
|
||||||
*.bin
|
*.bin
|
||||||
|
.DS_Store
|
||||||
|
|||||||
@ -1,26 +1,38 @@
|
|||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/psf/black
|
- repo: https://github.com/psf/black
|
||||||
rev: 21.6b0
|
rev: 22.3.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
args: [--line-length=80]
|
args: ["--line-length=88"]
|
||||||
additional_dependencies: ['click==8.0.1']
|
additional_dependencies: ['click==8.1.0']
|
||||||
exclude: icefall\/__init__\.py
|
exclude: icefall\/__init__\.py
|
||||||
|
|
||||||
- repo: https://github.com/PyCQA/flake8
|
- repo: https://github.com/PyCQA/flake8
|
||||||
rev: 3.9.2
|
rev: 5.0.4
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
args: [--max-line-length=80]
|
args: ["--max-line-length=88", "--extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503"]
|
||||||
|
|
||||||
|
# What are we ignoring here?
|
||||||
|
# E203: whitespace before ':'
|
||||||
|
# E266: too many leading '#' for block comment
|
||||||
|
# E501: line too long
|
||||||
|
# F401: module imported but unused
|
||||||
|
# E402: module level import not at top of file
|
||||||
|
# F403: 'from module import *' used; unable to detect undefined names
|
||||||
|
# F841: local variable is assigned to but never used
|
||||||
|
# W503: line break before binary operator
|
||||||
|
# In addition, the default ignore list is:
|
||||||
|
# E121,E123,E126,E226,E24,E704,W503,W504
|
||||||
|
|
||||||
- repo: https://github.com/pycqa/isort
|
- repo: https://github.com/pycqa/isort
|
||||||
rev: 5.9.2
|
rev: 5.10.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: isort
|
- id: isort
|
||||||
args: [--profile=black, --line-length=80]
|
args: ["--profile=black"]
|
||||||
|
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v4.0.1
|
rev: v4.2.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-executables-have-shebangs
|
- id: check-executables-have-shebangs
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
|
|||||||
9
LICENSE
@ -1,13 +1,4 @@
|
|||||||
|
|
||||||
Legal Notices
|
|
||||||
|
|
||||||
NOTE (this is not from the Apache License): The copyright model is that
|
|
||||||
authors (or their employers, if noted in individual files) own their
|
|
||||||
individual contributions. The authors' contributions can be discerned
|
|
||||||
from the git history.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------
|
|
||||||
|
|
||||||
Apache License
|
Apache License
|
||||||
Version 2.0, January 2004
|
Version 2.0, January 2004
|
||||||
http://www.apache.org/licenses/
|
http://www.apache.org/licenses/
|
||||||
|
|||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
|
2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
|
||||||
|
|
||||||
If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8.
|
If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8.
|
||||||
|
|
||||||
Otherwise, since the older PyTorch images are not updated with the [apt-key rotation by NVIDIA](https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key), you have to go for case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. Ensure that your NVDIA driver supports at least CUDA 11.0.
|
Otherwise, since the older PyTorch images are not updated with the [apt-key rotation by NVIDIA](https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key), you have to go for case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. Ensure that your NVDIA driver supports at least CUDA 11.0.
|
||||||
|
|
||||||
@ -10,7 +10,7 @@ You can check the highest CUDA version within your NVIDIA driver's support with
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ nvidia-smi
|
$ nvidia-smi
|
||||||
Tue Sep 20 00:26:13 2022
|
Tue Sep 20 00:26:13 2022
|
||||||
+-----------------------------------------------------------------------------+
|
+-----------------------------------------------------------------------------+
|
||||||
| NVIDIA-SMI 450.119.03 Driver Version: 450.119.03 CUDA Version: 11.0 |
|
| NVIDIA-SMI 450.119.03 Driver Version: 450.119.03 CUDA Version: 11.0 |
|
||||||
|-------------------------------+----------------------+----------------------+
|
|-------------------------------+----------------------+----------------------+
|
||||||
@ -26,7 +26,7 @@ Tue Sep 20 00:26:13 2022
|
|||||||
| 41% 30C P8 11W / 280W | 6MiB / 24220MiB | 0% Default |
|
| 41% 30C P8 11W / 280W | 6MiB / 24220MiB | 0% Default |
|
||||||
| | | N/A |
|
| | | N/A |
|
||||||
+-------------------------------+----------------------+----------------------+
|
+-------------------------------+----------------------+----------------------+
|
||||||
|
|
||||||
+-----------------------------------------------------------------------------+
|
+-----------------------------------------------------------------------------+
|
||||||
| Processes: |
|
| Processes: |
|
||||||
| GPU GI CI PID Type Process name GPU Memory |
|
| GPU GI CI PID Type Process name GPU Memory |
|
||||||
@ -40,15 +40,15 @@ Tue Sep 20 00:26:13 2022
|
|||||||
```
|
```
|
||||||
|
|
||||||
## Building images locally
|
## Building images locally
|
||||||
If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly.
|
If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly.
|
||||||
For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details.
|
For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details.
|
||||||
|
|
||||||
```dockerfile
|
```dockerfile
|
||||||
ENV http_proxy=http://aaa.bb.cc.net:8080 \
|
ENV http_proxy=http://aaa.bb.cc.net:8080 \
|
||||||
https_proxy=http://aaa.bb.cc.net:8080
|
https_proxy=http://aaa.bb.cc.net:8080
|
||||||
```
|
```
|
||||||
|
|
||||||
Then, proceed with these commands.
|
Then, proceed with these commands.
|
||||||
|
|
||||||
### If you are case (a), i.e. your NVIDIA driver supports CUDA version >= 11.3:
|
### If you are case (a), i.e. your NVIDIA driver supports CUDA version >= 11.3:
|
||||||
|
|
||||||
@ -72,11 +72,11 @@ docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all icefall
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Tips:
|
### Tips:
|
||||||
1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`.
|
1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`.
|
||||||
|
|
||||||
2. Also, if your environment requires a proxy, this would be a good time to add it in too: `-e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080`.
|
2. Also, if your environment requires a proxy, this would be a good time to add it in too: `-e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080`.
|
||||||
|
|
||||||
Overall, your docker run command should look like this.
|
Overall, your docker run command should look like this.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all -v {/path/in/host/machine}:{/path/in/docker} -e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080 icefall/pytorch1.12.1
|
docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all -v {/path/in/host/machine}:{/path/in/docker} -e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080 icefall/pytorch1.12.1
|
||||||
@ -86,9 +86,9 @@ You can explore more docker run options [here](https://docs.docker.com/engine/re
|
|||||||
|
|
||||||
### Linking to icefall in your host machine
|
### Linking to icefall in your host machine
|
||||||
|
|
||||||
If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container.
|
If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container.
|
||||||
|
|
||||||
Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine.
|
Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine.
|
||||||
Warning: Check that the icefall in your host machine is visible from within your container before proceeding to the commands below.
|
Warning: Check that the icefall in your host machine is visible from within your container before proceeding to the commands below.
|
||||||
|
|
||||||
Use these commands once you are inside the container.
|
Use these commands once you are inside the container.
|
||||||
@ -103,7 +103,7 @@ ln -s {/path/in/docker/to/icefall} /workspace/icefall
|
|||||||
docker exec -it icefall /bin/bash
|
docker exec -it icefall /bin/bash
|
||||||
```
|
```
|
||||||
|
|
||||||
## Restarting a killed container that has been run before.
|
## Restarting a killed container that has been run before.
|
||||||
```bash
|
```bash
|
||||||
docker start -ai icefall
|
docker start -ai icefall
|
||||||
```
|
```
|
||||||
@ -111,4 +111,4 @@ docker start -ai icefall
|
|||||||
## Sample usage of the CPU based images:
|
## Sample usage of the CPU based images:
|
||||||
```bash
|
```bash
|
||||||
docker run -it icefall /bin/bash
|
docker run -it icefall /bin/bash
|
||||||
```
|
```
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
|
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
|
||||||
|
|
||||||
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
|
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
|
||||||
# https_proxy=http://aaa.bbb.cc.net:8080
|
# https_proxy=http://aaa.bbb.cc.net:8080
|
||||||
|
|
||||||
# install normal source
|
# install normal source
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
@ -38,10 +38,10 @@ RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
|
|||||||
rm -rf cmake-3.18.0.tar.gz && \
|
rm -rf cmake-3.18.0.tar.gz && \
|
||||||
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
# flac
|
# flac
|
||||||
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
|
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
|
||||||
cd /opt && \
|
cd /opt && \
|
||||||
xz -d flac-1.3.2.tar.xz && \
|
xz -d flac-1.3.2.tar.xz && \
|
||||||
tar -xvf flac-1.3.2.tar && \
|
tar -xvf flac-1.3.2.tar && \
|
||||||
cd flac-1.3.2 && \
|
cd flac-1.3.2 && \
|
||||||
@ -49,11 +49,11 @@ RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz &&
|
|||||||
make && make install && \
|
make && make install && \
|
||||||
rm -rf flac-1.3.2.tar && \
|
rm -rf flac-1.3.2.tar && \
|
||||||
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
RUN conda install -y -c pytorch torchaudio=0.12 && \
|
RUN conda install -y -c pytorch torchaudio=0.12 && \
|
||||||
pip install graphviz
|
pip install graphviz
|
||||||
|
|
||||||
|
|
||||||
#install k2 from source
|
#install k2 from source
|
||||||
RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
|
RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
|
||||||
@ -68,6 +68,7 @@ RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
|||||||
cd /workspace/icefall && \
|
cd /workspace/icefall && \
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
RUN pip install kaldifeat
|
||||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||||
|
|
||||||
WORKDIR /workspace/icefall
|
WORKDIR /workspace/icefall
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
|
FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
|
||||||
|
|
||||||
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
|
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
|
||||||
# https_proxy=http://aaa.bbb.cc.net:8080
|
# https_proxy=http://aaa.bbb.cc.net:8080
|
||||||
|
|
||||||
RUN rm /etc/apt/sources.list.d/cuda.list && \
|
RUN rm /etc/apt/sources.list.d/cuda.list && \
|
||||||
rm /etc/apt/sources.list.d/nvidia-ml.list && \
|
rm /etc/apt/sources.list.d/nvidia-ml.list && \
|
||||||
apt-key del 7fa2af80
|
apt-key del 7fa2af80
|
||||||
|
|
||||||
# install normal source
|
# install normal source
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
@ -36,7 +36,7 @@ RUN curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu18
|
|||||||
curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
|
curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
|
||||||
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
|
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
|
||||||
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
|
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
|
||||||
rm -rf /var/lib/apt/lists/* && \
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
mv /opt/conda/lib/libcufft.so.10 /opt/libcufft.so.10.bak && \
|
mv /opt/conda/lib/libcufft.so.10 /opt/libcufft.so.10.bak && \
|
||||||
mv /opt/conda/lib/libcurand.so.10 /opt/libcurand.so.10.bak && \
|
mv /opt/conda/lib/libcurand.so.10 /opt/libcurand.so.10.bak && \
|
||||||
mv /opt/conda/lib/libcublas.so.11 /opt/libcublas.so.11.bak && \
|
mv /opt/conda/lib/libcublas.so.11 /opt/libcublas.so.11.bak && \
|
||||||
@ -56,10 +56,10 @@ RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
|
|||||||
rm -rf cmake-3.18.0.tar.gz && \
|
rm -rf cmake-3.18.0.tar.gz && \
|
||||||
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
# flac
|
# flac
|
||||||
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
|
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
|
||||||
cd /opt && \
|
cd /opt && \
|
||||||
xz -d flac-1.3.2.tar.xz && \
|
xz -d flac-1.3.2.tar.xz && \
|
||||||
tar -xvf flac-1.3.2.tar && \
|
tar -xvf flac-1.3.2.tar && \
|
||||||
cd flac-1.3.2 && \
|
cd flac-1.3.2 && \
|
||||||
@ -67,7 +67,7 @@ RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz &&
|
|||||||
make && make install && \
|
make && make install && \
|
||||||
rm -rf flac-1.3.2.tar && \
|
rm -rf flac-1.3.2.tar && \
|
||||||
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
RUN conda install -y -c pytorch torchaudio=0.7.1 && \
|
RUN conda install -y -c pytorch torchaudio=0.7.1 && \
|
||||||
pip install graphviz
|
pip install graphviz
|
||||||
@ -79,7 +79,7 @@ RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
|
|||||||
cd -
|
cd -
|
||||||
|
|
||||||
# install lhotse
|
# install lhotse
|
||||||
RUN pip install git+https://github.com/lhotse-speech/lhotse
|
RUN pip install git+https://github.com/lhotse-speech/lhotse
|
||||||
|
|
||||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||||
cd /workspace/icefall && \
|
cd /workspace/icefall && \
|
||||||
@ -88,4 +88,3 @@ RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
|||||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||||
|
|
||||||
WORKDIR /workspace/icefall
|
WORKDIR /workspace/icefall
|
||||||
|
|
||||||
|
|||||||
24
docs/README.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd /path/to/icefall/docs
|
||||||
|
pip install -r requirements.txt
|
||||||
|
make clean
|
||||||
|
make html
|
||||||
|
cd build/html
|
||||||
|
python3 -m http.server 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
It prints:
|
||||||
|
|
||||||
|
```
|
||||||
|
Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
|
||||||
|
```
|
||||||
|
|
||||||
|
Open your browser and go to <http://0.0.0.0:8000/> to view the generated
|
||||||
|
documentation.
|
||||||
|
|
||||||
|
Done!
|
||||||
|
|
||||||
|
**Hint**: You can change the port number when starting the server.
|
||||||
@ -78,3 +78,15 @@ html_context = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
todo_include_todos = True
|
todo_include_todos = True
|
||||||
|
|
||||||
|
rst_epilog = """
|
||||||
|
.. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
|
||||||
|
.. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
|
||||||
|
.. _icefall: https://github.com/k2-fsa/icefall
|
||||||
|
.. _git-lfs: https://git-lfs.com/
|
||||||
|
.. _ncnn: https://github.com/tencent/ncnn
|
||||||
|
.. _LibriSpeech: https://www.openslr.org/12
|
||||||
|
.. _musan: http://www.openslr.org/17/
|
||||||
|
.. _ONNX: https://github.com/onnx/onnx
|
||||||
|
.. _onnxruntime: https://github.com/microsoft/onnxruntime
|
||||||
|
"""
|
||||||
|
|||||||
@ -11,9 +11,9 @@ We use the following tools to make the code style to be as consistent as possibl
|
|||||||
|
|
||||||
The following versions of the above tools are used:
|
The following versions of the above tools are used:
|
||||||
|
|
||||||
- ``black == 12.6b0``
|
- ``black == 22.3.0``
|
||||||
- ``flake8 == 3.9.2``
|
- ``flake8 == 5.0.4``
|
||||||
- ``isort == 5.9.2``
|
- ``isort == 5.10.1``
|
||||||
|
|
||||||
After running the following commands:
|
After running the following commands:
|
||||||
|
|
||||||
@ -54,10 +54,17 @@ it should succeed this time:
|
|||||||
If you want to check the style of your code before ``git commit``, you
|
If you want to check the style of your code before ``git commit``, you
|
||||||
can do the following:
|
can do the following:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ pre-commit install
|
||||||
|
$ pre-commit run
|
||||||
|
|
||||||
|
Or without installing the pre-commit hooks:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ cd icefall
|
$ cd icefall
|
||||||
$ pip install black==21.6b0 flake8==3.9.2 isort==5.9.2
|
$ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
|
||||||
$ black --check your_changed_file.py
|
$ black --check your_changed_file.py
|
||||||
$ black your_changed_file.py # modify it in-place
|
$ black your_changed_file.py # modify it in-place
|
||||||
$
|
$
|
||||||
|
|||||||
107
docs/source/faqs.rst
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
Frequently Asked Questions (FAQs)
|
||||||
|
=================================
|
||||||
|
|
||||||
|
In this section, we collect issues reported by users and post the corresponding
|
||||||
|
solutions.
|
||||||
|
|
||||||
|
|
||||||
|
OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
|
||||||
|
-----------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
One user is using the following code to install ``torch`` and ``torchaudio``:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install \
|
||||||
|
torch==1.10.0+cu111 \
|
||||||
|
torchvision==0.11.0+cu111 \
|
||||||
|
torchaudio==0.10.0 \
|
||||||
|
-f https://download.pytorch.org/whl/torch_stable.html
|
||||||
|
|
||||||
|
and it throws the following error when running ``tdnn/train.py``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
|
||||||
|
|
||||||
|
The fix is to specify the CUDA version while installing ``torchaudio``. That
|
||||||
|
is, change ``torchaudio==0.10.0`` to ``torchaudio==0.10.0+cu11```. Therefore,
|
||||||
|
the correct command is:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip install \
|
||||||
|
torch==1.10.0+cu111 \
|
||||||
|
torchvision==0.11.0+cu111 \
|
||||||
|
torchaudio==0.10.0+cu111 \
|
||||||
|
-f https://download.pytorch.org/whl/torch_stable.html
|
||||||
|
|
||||||
|
AttributeError: module 'distutils' has no attribute 'version'
|
||||||
|
-------------------------------------------------------------
|
||||||
|
|
||||||
|
The error log is:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "./tdnn/train.py", line 14, in <module>
|
||||||
|
from asr_datamodule import YesNoAsrDataModule
|
||||||
|
File "/home/xxx/code/next-gen-kaldi/icefall/egs/yesno/ASR/tdnn/asr_datamodule.py", line 34, in <module>
|
||||||
|
from icefall.dataset.datamodule import DataModule
|
||||||
|
File "/home/xxx/code/next-gen-kaldi/icefall/icefall/__init__.py", line 3, in <module>
|
||||||
|
from . import (
|
||||||
|
File "/home/xxx/code/next-gen-kaldi/icefall/icefall/decode.py", line 23, in <module>
|
||||||
|
from icefall.utils import add_eos, add_sos, get_texts
|
||||||
|
File "/home/xxx/code/next-gen-kaldi/icefall/icefall/utils.py", line 39, in <module>
|
||||||
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
File "/home/xxx/tool/miniconda3/envs/yyy/lib/python3.8/site-packages/torch/utils/tensorboard/__init__.py", line 4, in <module>
|
||||||
|
LooseVersion = distutils.version.LooseVersion
|
||||||
|
AttributeError: module 'distutils' has no attribute 'version'
|
||||||
|
|
||||||
|
The fix is:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
pip uninstall setuptools
|
||||||
|
|
||||||
|
pip install setuptools==58.0.4
|
||||||
|
|
||||||
|
ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
|
||||||
|
--------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
If you are using ``conda`` and encounter the following issue:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 24, in <module>
|
||||||
|
from _k2 import DeterminizeWeightPushingType
|
||||||
|
ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
|
||||||
|
|
||||||
|
During handling of the above exception, another exception occurred:
|
||||||
|
|
||||||
|
Traceback (most recent call last):
|
||||||
|
File "/k2-dev/yangyifan/icefall/egs/librispeech/ASR/./pruned_transducer_stateless7_ctc_bs/decode.py", line 104, in <module>
|
||||||
|
import k2
|
||||||
|
File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 30, in <module>
|
||||||
|
raise ImportError(
|
||||||
|
ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
|
||||||
|
Note: If you're using anaconda and importing k2 on MacOS,
|
||||||
|
you can probably fix this by setting the environment variable:
|
||||||
|
export DYLD_LIBRARY_PATH=$CONDA_PREFIX/lib/python3.10/site-packages:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
Please first try to find where ``libpython3.10.so.1.0`` locates.
|
||||||
|
|
||||||
|
For instance,
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd $CONDA_PREFIX/lib
|
||||||
|
find . -name "libpython*"
|
||||||
|
|
||||||
|
If you are able to find it inside ``$CODNA_PREFIX/lib``, please set the
|
||||||
|
following environment variable:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
|
||||||
@ -21,7 +21,16 @@ speech recognition recipes using `k2 <https://github.com/k2-fsa/k2>`_.
|
|||||||
:caption: Contents:
|
:caption: Contents:
|
||||||
|
|
||||||
installation/index
|
installation/index
|
||||||
|
faqs
|
||||||
model-export/index
|
model-export/index
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 3
|
||||||
|
|
||||||
recipes/index
|
recipes/index
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
contributing/index
|
contributing/index
|
||||||
huggingface/index
|
huggingface/index
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
@ -1 +1 @@
|
|||||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: >= 3.6"><title>python: >= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">>= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">>= 3.6</text></g></svg>
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: >= 3.6"><title>python: >= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">>= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">>= 3.6</text></g></svg>
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
@ -1 +1 @@
|
|||||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: >= 1.6.0"><title>torch: >= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">>= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">>= 1.6.0</text></g></svg>
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: >= 1.6.0"><title>torch: >= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">>= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">>= 1.6.0</text></g></svg>
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB |
@ -393,6 +393,17 @@ Now let us run the training part:
|
|||||||
We use ``export CUDA_VISIBLE_DEVICES=""`` so that ``icefall`` uses CPU
|
We use ``export CUDA_VISIBLE_DEVICES=""`` so that ``icefall`` uses CPU
|
||||||
even if there are GPUs available.
|
even if there are GPUs available.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
In case you get a ``Segmentation fault (core dump)`` error, please use:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
|
||||||
|
|
||||||
|
See more at `<https://github.com/k2-fsa/icefall/issues/674>` if you are
|
||||||
|
interested.
|
||||||
|
|
||||||
The training log is given below:
|
The training log is given below:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|||||||
@ -0,0 +1,21 @@
|
|||||||
|
2023-01-11 12:15:38,677 INFO [export-for-ncnn.py:220] device: cpu
|
||||||
|
2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:229] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_v
|
||||||
|
alid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampl
|
||||||
|
ing_factor': 4, 'decoder_dim': 512, 'joiner_dim': 512, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.2', 'k2-build-type':
|
||||||
|
'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'a34171ed85605b0926eebbd0463d059431f4f74a', 'k2-git-date': 'Wed Dec 14 00:06:38 2022',
|
||||||
|
'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-vers
|
||||||
|
ion': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'fix-stateless3-train-2022-12-27', 'icefall-git-sha1': '530e8a1-dirty', '
|
||||||
|
icefall-git-date': 'Tue Dec 27 13:59:18 2022', 'icefall-path': '/star-fj/fangjun/open-source/icefall', 'k2-path': '/star-fj/fangjun/op
|
||||||
|
en-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279
|
||||||
|
-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '127.0.0.1'}, 'epoch': 30, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefa
|
||||||
|
ll-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp'), 'bpe_model': './icefall-asr-librispeech-conv-emformer-transdu
|
||||||
|
cer-stateless2-2022-07-05//data/lang_bpe_500/bpe.model', 'jit': False, 'context_size': 2, 'use_averaged_model': False, 'encoder_dim':
|
||||||
|
512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'cnn_module_kernel': 31, 'left_context_length': 32, 'chunk_length'
|
||||||
|
: 32, 'right_context_length': 8, 'memory_size': 32, 'blank_id': 0, 'vocab_size': 500}
|
||||||
|
2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:231] About to create model
|
||||||
|
2023-01-11 12:15:40,053 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-conv-emformer-transducer-stateless2-2
|
||||||
|
022-07-05/exp/epoch-30.pt
|
||||||
|
2023-01-11 12:15:40,708 INFO [export-for-ncnn.py:315] Number of model parameters: 75490012
|
||||||
|
2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:318] Using torch.jit.trace()
|
||||||
|
2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:320] Exporting encoder
|
||||||
|
2023-01-11 12:15:41,682 INFO [export-for-ncnn.py:149] chunk_length: 32, right_context_length: 8
|
||||||
@ -0,0 +1,18 @@
|
|||||||
|
2023-02-17 11:22:42,862 INFO [export-for-ncnn.py:222] device: cpu
|
||||||
|
2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:231] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'dim_feedforward': 2048, 'decoder_dim': 512, 'joiner_dim': 512, 'is_pnnx': False, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-dirty', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp'), 'bpe_model': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': 12, 'encoder_dim': 512, 'rnn_hidden_size': 1024, 'aux_layer_period': 0, 'blank_id': 0, 'vocab_size': 500}
|
||||||
|
2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:235] About to create model
|
||||||
|
2023-02-17 11:22:43,239 INFO [train.py:472] Disable giga
|
||||||
|
2023-02-17 11:22:43,249 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/epoch-99.pt
|
||||||
|
2023-02-17 11:22:44,595 INFO [export-for-ncnn.py:324] encoder parameters: 83137520
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:325] decoder parameters: 257024
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:326] joiner parameters: 781812
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:327] total parameters: 84176356
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:329] Using torch.jit.trace()
|
||||||
|
2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:331] Exporting encoder
|
||||||
|
2023-02-17 11:22:48,182 INFO [export-for-ncnn.py:158] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-17 11:22:48,183 INFO [export-for-ncnn.py:335] Exporting decoder
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/lstm_transducer_stateless2/decoder.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
need_pad = bool(need_pad)
|
||||||
|
2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:180] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:339] Exporting joiner
|
||||||
|
2023-02-17 11:22:48,304 INFO [export-for-ncnn.py:207] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
|
||||||
@ -0,0 +1,74 @@
|
|||||||
|
2023-02-27 20:23:07,473 INFO [export-for-ncnn.py:246] device: cpu
|
||||||
|
2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:255] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-clean', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp'), 'bpe_model': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 32, 'blank_id': 0, 'vocab_size': 500}
|
||||||
|
2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:257] About to create model
|
||||||
|
2023-02-27 20:23:08,023 INFO [zipformer2.py:419] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
|
||||||
|
2023-02-27 20:23:08,037 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/epoch-99.pt
|
||||||
|
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:346] encoder parameters: 68944004
|
||||||
|
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:347] decoder parameters: 260096
|
||||||
|
2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:348] joiner parameters: 716276
|
||||||
|
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:349] total parameters: 69920376
|
||||||
|
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:351] Using torch.jit.trace()
|
||||||
|
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:353] Exporting encoder
|
||||||
|
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:174] decode_chunk_len: 32
|
||||||
|
2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:175] T: 39
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1344: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_len.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_avg.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_key.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1356: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_val.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1360: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_val2.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1364: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_conv1.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1368: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_conv2.size(0) == self.num_layers, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert self.left_context_len == cached_key.shape[1], (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1884: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert self.x_size == x.size(0), (self.x_size, x.size(0))
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2442: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_key.shape[0] == self.left_context_len, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_key.shape[0] == cached_val.shape[0], (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2469: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_key.shape[0] == left_context_len, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2473: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_val.shape[0] == left_context_len, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2483: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert kv_len == k.shape[0], (kv_len, k.shape)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2570: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert list(attn_output.size()) == [bsz * num_heads, seq_len, head_dim // 2]
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2926: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cache.shape == (x.size(0), x.size(1), self.lorder), (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2652: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert x.shape[0] == self.x_size, (x.shape[0], self.x_size)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2653: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert x.shape[2] == self.embed_dim, (x.shape[2], self.embed_dim)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2666: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert cached_val.shape[0] == self.left_context_len, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1543: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src.shape[0] == self.in_x_size, (src.shape[0], self.in_x_size)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1637: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src.shape[0] == self.in_x_size, (
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1643: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src.shape[2] == self.in_channels, (src.shape[2], self.in_channels)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1571: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
if src.shape[0] != self.in_x_size:
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1763: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src1.shape[:-1] == src2.shape[:-1], (src1.shape, src2.shape)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1779: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src1.shape[-1] == self.dim1, (src1.shape[-1], self.dim1)
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1780: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert src2.shape[-1] == self.dim2, (src2.shape[-1], self.dim2)
|
||||||
|
/star-fj/fangjun/py38/lib/python3.8/site-packages/torch/jit/_trace.py:958: TracerWarning: Encountering a list at the output of the tracer might cause the trace to be incorrect, this is only valid if the container structure does not change based on the module's inputs. Consider using a constant container instead (e.g. for `list`, use a `tuple` instead. for `dict`, use a `NamedTuple` instead). If you absolutely need this and know the side effects, pass strict=False to trace() to allow this behavior.
|
||||||
|
module._c._create_method_from_trace(
|
||||||
|
2023-02-27 20:23:19,640 INFO [export-for-ncnn.py:182] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-27 20:23:19,646 INFO [export-for-ncnn.py:357] Exporting decoder
|
||||||
|
/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decoder.py:102: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
|
||||||
|
assert embedding_out.size(-1) == self.context_size
|
||||||
|
2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:204] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:361] Exporting joiner
|
||||||
|
2023-02-27 20:23:19,735 INFO [export-for-ncnn.py:231] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
|
||||||
@ -0,0 +1,104 @@
|
|||||||
|
Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
|
||||||
|
num encoder conv layers: 88
|
||||||
|
num joiner conv layers: 3
|
||||||
|
num files: 3
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
----------encoder----------
|
||||||
|
conv_87 : max = 15.942385 threshold = 15.938493 scale = 7.968131
|
||||||
|
conv_88 : max = 35.442448 threshold = 15.549335 scale = 8.167552
|
||||||
|
conv_89 : max = 23.228289 threshold = 8.001738 scale = 15.871552
|
||||||
|
linear_90 : max = 3.976146 threshold = 1.101789 scale = 115.267128
|
||||||
|
linear_91 : max = 6.962030 threshold = 5.162033 scale = 24.602713
|
||||||
|
linear_92 : max = 12.323041 threshold = 3.853959 scale = 32.953129
|
||||||
|
linear_94 : max = 6.905416 threshold = 4.648006 scale = 27.323545
|
||||||
|
linear_93 : max = 6.905416 threshold = 5.474093 scale = 23.200188
|
||||||
|
linear_95 : max = 1.888012 threshold = 1.403563 scale = 90.483986
|
||||||
|
linear_96 : max = 6.856741 threshold = 5.398679 scale = 23.524273
|
||||||
|
linear_97 : max = 9.635942 threshold = 2.613655 scale = 48.590950
|
||||||
|
linear_98 : max = 6.460340 threshold = 5.670146 scale = 22.398010
|
||||||
|
linear_99 : max = 9.532276 threshold = 2.585537 scale = 49.119396
|
||||||
|
linear_101 : max = 6.585871 threshold = 5.719224 scale = 22.205809
|
||||||
|
linear_100 : max = 6.585871 threshold = 5.751382 scale = 22.081648
|
||||||
|
linear_102 : max = 1.593344 threshold = 1.450581 scale = 87.551147
|
||||||
|
linear_103 : max = 6.592681 threshold = 5.705824 scale = 22.257959
|
||||||
|
linear_104 : max = 8.752957 threshold = 1.980955 scale = 64.110489
|
||||||
|
linear_105 : max = 6.696240 threshold = 5.877193 scale = 21.608953
|
||||||
|
linear_106 : max = 9.059659 threshold = 2.643138 scale = 48.048950
|
||||||
|
linear_108 : max = 6.975461 threshold = 4.589567 scale = 27.671457
|
||||||
|
linear_107 : max = 6.975461 threshold = 6.190381 scale = 20.515701
|
||||||
|
linear_109 : max = 3.710759 threshold = 2.305635 scale = 55.082436
|
||||||
|
linear_110 : max = 7.531228 threshold = 5.731162 scale = 22.159557
|
||||||
|
linear_111 : max = 10.528083 threshold = 2.259322 scale = 56.211544
|
||||||
|
linear_112 : max = 8.148807 threshold = 5.500842 scale = 23.087374
|
||||||
|
linear_113 : max = 8.592566 threshold = 1.948851 scale = 65.166611
|
||||||
|
linear_115 : max = 8.437109 threshold = 5.608947 scale = 22.642395
|
||||||
|
linear_114 : max = 8.437109 threshold = 6.193942 scale = 20.503904
|
||||||
|
linear_116 : max = 3.966980 threshold = 3.200896 scale = 39.676392
|
||||||
|
linear_117 : max = 9.451303 threshold = 6.061664 scale = 20.951344
|
||||||
|
linear_118 : max = 12.077262 threshold = 3.965800 scale = 32.023804
|
||||||
|
linear_119 : max = 9.671615 threshold = 4.847613 scale = 26.198460
|
||||||
|
linear_120 : max = 8.625638 threshold = 3.131427 scale = 40.556595
|
||||||
|
linear_122 : max = 10.274080 threshold = 4.888716 scale = 25.978189
|
||||||
|
linear_121 : max = 10.274080 threshold = 5.420480 scale = 23.429659
|
||||||
|
linear_123 : max = 4.826197 threshold = 3.599617 scale = 35.281532
|
||||||
|
linear_124 : max = 11.396383 threshold = 7.325849 scale = 17.335875
|
||||||
|
linear_125 : max = 9.337198 threshold = 3.941410 scale = 32.221970
|
||||||
|
linear_126 : max = 9.699965 threshold = 4.842878 scale = 26.224073
|
||||||
|
linear_127 : max = 8.775370 threshold = 3.884215 scale = 32.696438
|
||||||
|
linear_129 : max = 9.872276 threshold = 4.837319 scale = 26.254213
|
||||||
|
linear_128 : max = 9.872276 threshold = 7.180057 scale = 17.687883
|
||||||
|
linear_130 : max = 4.150427 threshold = 3.454298 scale = 36.765789
|
||||||
|
linear_131 : max = 11.112692 threshold = 7.924847 scale = 16.025545
|
||||||
|
linear_132 : max = 11.852893 threshold = 3.116593 scale = 40.749626
|
||||||
|
linear_133 : max = 11.517084 threshold = 5.024665 scale = 25.275314
|
||||||
|
linear_134 : max = 10.683807 threshold = 3.878618 scale = 32.743618
|
||||||
|
linear_136 : max = 12.421055 threshold = 6.322729 scale = 20.086264
|
||||||
|
linear_135 : max = 12.421055 threshold = 5.309880 scale = 23.917679
|
||||||
|
linear_137 : max = 4.827781 threshold = 3.744595 scale = 33.915554
|
||||||
|
linear_138 : max = 14.422395 threshold = 7.742882 scale = 16.402161
|
||||||
|
linear_139 : max = 8.527538 threshold = 3.866123 scale = 32.849449
|
||||||
|
linear_140 : max = 12.128619 threshold = 4.657793 scale = 27.266134
|
||||||
|
linear_141 : max = 9.839593 threshold = 3.845993 scale = 33.021378
|
||||||
|
linear_143 : max = 12.442304 threshold = 7.099039 scale = 17.889746
|
||||||
|
linear_142 : max = 12.442304 threshold = 5.325038 scale = 23.849592
|
||||||
|
linear_144 : max = 5.929444 threshold = 5.618206 scale = 22.605080
|
||||||
|
linear_145 : max = 13.382126 threshold = 9.321095 scale = 13.625010
|
||||||
|
linear_146 : max = 9.894987 threshold = 3.867645 scale = 32.836517
|
||||||
|
linear_147 : max = 10.915313 threshold = 4.906028 scale = 25.886522
|
||||||
|
linear_148 : max = 9.614287 threshold = 3.908151 scale = 32.496181
|
||||||
|
linear_150 : max = 11.724932 threshold = 4.485588 scale = 28.312899
|
||||||
|
linear_149 : max = 11.724932 threshold = 5.161146 scale = 24.606939
|
||||||
|
linear_151 : max = 7.164453 threshold = 5.847355 scale = 21.719223
|
||||||
|
linear_152 : max = 13.086471 threshold = 5.984121 scale = 21.222834
|
||||||
|
linear_153 : max = 11.099524 threshold = 3.991601 scale = 31.816805
|
||||||
|
linear_154 : max = 10.054585 threshold = 4.489706 scale = 28.286930
|
||||||
|
linear_155 : max = 12.389185 threshold = 3.100321 scale = 40.963501
|
||||||
|
linear_157 : max = 9.982999 threshold = 5.154796 scale = 24.637253
|
||||||
|
linear_156 : max = 9.982999 threshold = 8.537706 scale = 14.875190
|
||||||
|
linear_158 : max = 8.420287 threshold = 6.502287 scale = 19.531588
|
||||||
|
linear_159 : max = 25.014746 threshold = 9.423280 scale = 13.477261
|
||||||
|
linear_160 : max = 45.633553 threshold = 5.715335 scale = 22.220921
|
||||||
|
linear_161 : max = 20.371849 threshold = 5.117830 scale = 24.815203
|
||||||
|
linear_162 : max = 12.492933 threshold = 3.126283 scale = 40.623318
|
||||||
|
linear_164 : max = 20.697504 threshold = 4.825712 scale = 26.317358
|
||||||
|
linear_163 : max = 20.697504 threshold = 5.078367 scale = 25.008038
|
||||||
|
linear_165 : max = 9.023975 threshold = 6.836278 scale = 18.577358
|
||||||
|
linear_166 : max = 34.860619 threshold = 7.259792 scale = 17.493614
|
||||||
|
linear_167 : max = 30.380934 threshold = 5.496160 scale = 23.107042
|
||||||
|
linear_168 : max = 20.691216 threshold = 4.733317 scale = 26.831076
|
||||||
|
linear_169 : max = 9.723948 threshold = 3.952728 scale = 32.129707
|
||||||
|
linear_171 : max = 21.034811 threshold = 5.366547 scale = 23.665123
|
||||||
|
linear_170 : max = 21.034811 threshold = 5.356277 scale = 23.710501
|
||||||
|
linear_172 : max = 10.556884 threshold = 5.729481 scale = 22.166058
|
||||||
|
linear_173 : max = 20.033039 threshold = 10.207264 scale = 12.442120
|
||||||
|
linear_174 : max = 11.597379 threshold = 2.658676 scale = 47.768131
|
||||||
|
----------joiner----------
|
||||||
|
linear_2 : max = 19.293503 threshold = 14.305265 scale = 8.877850
|
||||||
|
linear_1 : max = 10.812222 threshold = 8.766452 scale = 14.487047
|
||||||
|
linear_3 : max = 0.999999 threshold = 0.999755 scale = 127.031174
|
||||||
|
ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
|
||||||
@ -0,0 +1,44 @@
|
|||||||
|
Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
|
||||||
|
num encoder conv layers: 28
|
||||||
|
num joiner conv layers: 3
|
||||||
|
num files: 3
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
Processing ../test_wavs/1089-134686-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0001.wav
|
||||||
|
Processing ../test_wavs/1221-135766-0002.wav
|
||||||
|
----------encoder----------
|
||||||
|
conv_15 : max = 15.942385 threshold = 15.930708 scale = 7.972025
|
||||||
|
conv_16 : max = 44.978855 threshold = 17.031788 scale = 7.456645
|
||||||
|
conv_17 : max = 17.868437 threshold = 7.830528 scale = 16.218575
|
||||||
|
linear_18 : max = 3.107259 threshold = 1.194808 scale = 106.293236
|
||||||
|
linear_19 : max = 6.193777 threshold = 4.634748 scale = 27.401705
|
||||||
|
linear_20 : max = 9.259933 threshold = 2.606617 scale = 48.722160
|
||||||
|
linear_21 : max = 5.186600 threshold = 4.790260 scale = 26.512129
|
||||||
|
linear_22 : max = 9.759041 threshold = 2.265832 scale = 56.050053
|
||||||
|
linear_23 : max = 3.931209 threshold = 3.099090 scale = 40.979767
|
||||||
|
linear_24 : max = 10.324160 threshold = 2.215561 scale = 57.321835
|
||||||
|
linear_25 : max = 3.800708 threshold = 3.599352 scale = 35.284134
|
||||||
|
linear_26 : max = 10.492444 threshold = 3.153369 scale = 40.274391
|
||||||
|
linear_27 : max = 3.660161 threshold = 2.720994 scale = 46.674126
|
||||||
|
linear_28 : max = 9.415265 threshold = 3.174434 scale = 40.007133
|
||||||
|
linear_29 : max = 4.038418 threshold = 3.118534 scale = 40.724262
|
||||||
|
linear_30 : max = 10.072084 threshold = 3.936867 scale = 32.259155
|
||||||
|
linear_31 : max = 4.342712 threshold = 3.599489 scale = 35.282787
|
||||||
|
linear_32 : max = 11.340535 threshold = 3.120308 scale = 40.701103
|
||||||
|
linear_33 : max = 3.846987 threshold = 3.630030 scale = 34.985939
|
||||||
|
linear_34 : max = 10.686298 threshold = 2.204571 scale = 57.607586
|
||||||
|
linear_35 : max = 4.904821 threshold = 4.575518 scale = 27.756420
|
||||||
|
linear_36 : max = 11.806659 threshold = 2.585589 scale = 49.118401
|
||||||
|
linear_37 : max = 6.402340 threshold = 5.047157 scale = 25.162680
|
||||||
|
linear_38 : max = 11.174589 threshold = 1.923361 scale = 66.030258
|
||||||
|
linear_39 : max = 16.178576 threshold = 7.556058 scale = 16.807705
|
||||||
|
linear_40 : max = 12.901954 threshold = 5.301267 scale = 23.956539
|
||||||
|
linear_41 : max = 14.839805 threshold = 7.597429 scale = 16.716181
|
||||||
|
linear_42 : max = 10.178945 threshold = 2.651595 scale = 47.895699
|
||||||
|
----------joiner----------
|
||||||
|
linear_2 : max = 24.829245 threshold = 16.627592 scale = 7.637907
|
||||||
|
linear_1 : max = 10.746186 threshold = 5.255032 scale = 24.167313
|
||||||
|
linear_3 : max = 1.000000 threshold = 0.999756 scale = 127.031013
|
||||||
|
ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
2023-01-11 14:02:12,216 INFO [streaming-ncnn-decode.py:320] {'tokens': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav'}
|
||||||
|
T 51 32
|
||||||
|
2023-01-11 14:02:13,141 INFO [streaming-ncnn-decode.py:328] Constructing Fbank computer
|
||||||
|
2023-01-11 14:02:13,151 INFO [streaming-ncnn-decode.py:331] Reading sound files: ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-01-11 14:02:13,176 INFO [streaming-ncnn-decode.py:336] torch.Size([106000])
|
||||||
|
2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:380] ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:381] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
2023-02-17 11:37:30,861 INFO [streaming-ncnn-decode.py:255] {'tokens': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav'}
|
||||||
|
2023-02-17 11:37:31,425 INFO [streaming-ncnn-decode.py:263] Constructing Fbank computer
|
||||||
|
2023-02-17 11:37:31,427 INFO [streaming-ncnn-decode.py:266] Reading sound files: ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-17 11:37:31,431 INFO [streaming-ncnn-decode.py:271] torch.Size([106000])
|
||||||
|
2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:342] ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:343] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
2023-02-27 20:43:40,283 INFO [streaming-ncnn-decode.py:349] {'tokens': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav'}
|
||||||
|
2023-02-27 20:43:41,260 INFO [streaming-ncnn-decode.py:357] Constructing Fbank computer
|
||||||
|
2023-02-27 20:43:41,264 INFO [streaming-ncnn-decode.py:360] Reading sound files: ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-27 20:43:41,269 INFO [streaming-ncnn-decode.py:365] torch.Size([106000])
|
||||||
|
2023-02-27 20:43:41,280 INFO [streaming-ncnn-decode.py:372] number of states: 35
|
||||||
|
2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:410] ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||||
|
2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:411] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||||
753
docs/source/model-export/export-ncnn-conv-emformer.rst
Normal file
@ -0,0 +1,753 @@
|
|||||||
|
.. _export_conv_emformer_transducer_models_to_ncnn:
|
||||||
|
|
||||||
|
Export ConvEmformer transducer models to ncnn
|
||||||
|
=============================================
|
||||||
|
|
||||||
|
We use the pre-trained model from the following repository as an example:
|
||||||
|
|
||||||
|
- `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
||||||
|
|
||||||
|
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||||
|
may ``not`` work.
|
||||||
|
|
||||||
|
1. Download the pre-trained model
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You can also refer to `<https://k2-fsa.github.io/sherpa/cpp/pretrained_models/online_transducer.html#icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_ to download the pre-trained model.
|
||||||
|
|
||||||
|
You have to install `git-lfs`_ before you continue.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
||||||
|
|
||||||
|
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||||
|
|
||||||
|
|
||||||
|
In the above code, we downloaded the pre-trained model into the directory
|
||||||
|
``egs/librispeech/ASR/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05``.
|
||||||
|
|
||||||
|
.. _export_for_ncnn_install_ncnn_and_pnnx:
|
||||||
|
|
||||||
|
2. Install ncnn and pnnx
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We put ncnn into $HOME/open-source/ncnn
|
||||||
|
# You can change it to anywhere you like
|
||||||
|
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
cd open-source
|
||||||
|
|
||||||
|
git clone https://github.com/csukuangfj/ncnn
|
||||||
|
cd ncnn
|
||||||
|
git submodule update --recursive --init
|
||||||
|
|
||||||
|
# Note: We don't use "python setup.py install" or "pip install ." here
|
||||||
|
|
||||||
|
mkdir -p build-wheel
|
||||||
|
cd build-wheel
|
||||||
|
|
||||||
|
cmake \
|
||||||
|
-DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DNCNN_PYTHON=ON \
|
||||||
|
-DNCNN_BUILD_BENCHMARK=OFF \
|
||||||
|
-DNCNN_BUILD_EXAMPLES=OFF \
|
||||||
|
-DNCNN_BUILD_TOOLS=ON \
|
||||||
|
..
|
||||||
|
|
||||||
|
make -j4
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
# Note: $PWD here is $HOME/open-source/ncnn
|
||||||
|
|
||||||
|
export PYTHONPATH=$PWD/python:$PYTHONPATH
|
||||||
|
export PATH=$PWD/tools/pnnx/build/src:$PATH
|
||||||
|
export PATH=$PWD/build-wheel/tools/quantize:$PATH
|
||||||
|
|
||||||
|
# Now build pnnx
|
||||||
|
cd tools/pnnx
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j4
|
||||||
|
|
||||||
|
./src/pnnx
|
||||||
|
|
||||||
|
Congratulations! You have successfully installed the following components:
|
||||||
|
|
||||||
|
- ``pnnx``, which is an executable located in
|
||||||
|
``$HOME/open-source/ncnn/tools/pnnx/build/src``. We will use
|
||||||
|
it to convert models exported by ``torch.jit.trace()``.
|
||||||
|
- ``ncnn2int8``, which is an executable located in
|
||||||
|
``$HOME/open-source/ncnn/build-wheel/tools/quantize``. We will use
|
||||||
|
it to quantize our models to ``int8``.
|
||||||
|
- ``ncnn.cpython-38-x86_64-linux-gnu.so``, which is a Python module located
|
||||||
|
in ``$HOME/open-source/ncnn/python/ncnn``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
I am using ``Python 3.8``, so it
|
||||||
|
is ``ncnn.cpython-38-x86_64-linux-gnu.so``. If you use a different
|
||||||
|
version, say, ``Python 3.9``, the name would be
|
||||||
|
``ncnn.cpython-39-x86_64-linux-gnu.so``.
|
||||||
|
|
||||||
|
Also, if you are not using Linux, the file name would also be different.
|
||||||
|
But that does not matter. As long as you can compile it, it should work.
|
||||||
|
|
||||||
|
We have set up ``PYTHONPATH`` so that you can use ``import ncnn`` in your
|
||||||
|
Python code. We have also set up ``PATH`` so that you can use
|
||||||
|
``pnnx`` and ``ncnn2int8`` later in your terminal.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please don't use `<https://github.com/tencent/ncnn>`_.
|
||||||
|
We have made some modifications to the offical `ncnn`_.
|
||||||
|
|
||||||
|
We will synchronize `<https://github.com/csukuangfj/ncnn>`_ periodically
|
||||||
|
with the official one.
|
||||||
|
|
||||||
|
3. Export the model via torch.jit.trace()
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
First, let us rename our pre-trained model:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp
|
||||||
|
|
||||||
|
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-30.pt
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
Next, we use the following code to export our model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
dir=./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $dir/exp \
|
||||||
|
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 30 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
\
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--chunk-length 32 \
|
||||||
|
--cnn-module-kernel 31 \
|
||||||
|
--left-context-length 32 \
|
||||||
|
--right-context-length 8 \
|
||||||
|
--memory-size 32 \
|
||||||
|
--encoder-dim 512
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
If your model has different configuration parameters, please change them accordingly.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
|
||||||
|
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||||
|
|
||||||
|
If you have trained a model by yourself and if you have all checkpoints
|
||||||
|
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||||
|
and select the best combination with with ``--use-averaged-model 1``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You will see the following log output:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/export-conv-emformer-transducer-for-ncnn-output.txt
|
||||||
|
|
||||||
|
The log shows the model has ``75490012`` parameters, i.e., ``~75 M``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 289M Jan 11 12:05 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
|
||||||
|
|
||||||
|
You can see that the file size of the pre-trained model is ``289 MB``, which
|
||||||
|
is roughly equal to ``75490012*4/1024/1024 = 287.97 MB``.
|
||||||
|
|
||||||
|
After running ``conv_emformer_transducer_stateless2/export-for-ncnn.py``,
|
||||||
|
we will get the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*pnnx*
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1010K Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
|
||||||
|
.. _conv-emformer-step-4-export-torchscript-model-via-pnnx:
|
||||||
|
|
||||||
|
4. Export torchscript model via pnnx
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Make sure you have set up the ``PATH`` environment variable. Otherwise,
|
||||||
|
it will throw an error saying that ``pnnx`` could not be found.
|
||||||
|
|
||||||
|
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
It will generate the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*ncnn*{bin,param}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 142M Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1.5M Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
There are two types of files:
|
||||||
|
|
||||||
|
- ``param``: It is a text file containing the model architectures. You can
|
||||||
|
use a text editor to view its content.
|
||||||
|
- ``bin``: It is a binary file containing the model parameters.
|
||||||
|
|
||||||
|
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||||
|
|
||||||
|
.. see https://tableconvert.com/restructuredtext-generator
|
||||||
|
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+==================================+============+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin | 142 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin | 503 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin | 1.5 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the models after conversion are about one half
|
||||||
|
of the models before conversion:
|
||||||
|
|
||||||
|
- encoder: 283 MB vs 142 MB
|
||||||
|
- decoder: 1010 KB vs 503 KB
|
||||||
|
- joiner: 3.0 MB vs 1.5 MB
|
||||||
|
|
||||||
|
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||||
|
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||||
|
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||||
|
won't convert ``float32`` to ``float16``.
|
||||||
|
|
||||||
|
5. Test the exported models in icefall
|
||||||
|
--------------------------------------
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||||
|
building `ncnn`_.
|
||||||
|
|
||||||
|
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||||
|
The generated 6 files are what we need. You can use the following code to
|
||||||
|
test the converted models:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||||
|
only 1 wave file as input.
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
|
||||||
|
|
||||||
|
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||||
|
|
||||||
|
|
||||||
|
.. _conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||||
|
|
||||||
|
6. Modify the exported encoder for sherpa-ncnn
|
||||||
|
----------------------------------------------
|
||||||
|
|
||||||
|
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||||
|
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1060 1342
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation** of the above three lines:
|
||||||
|
|
||||||
|
1. ``7767517``, it is a magic number and should not be changed.
|
||||||
|
2. ``1060 1342``, the first number ``1060`` specifies the number of layers
|
||||||
|
in this file, while ``1342`` specifies the number of intermediate outputs
|
||||||
|
of this file
|
||||||
|
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||||
|
is the layer name of this layer; ``0`` means this layer has no input;
|
||||||
|
``1`` means this layer has one output; ``in0`` is the output name of
|
||||||
|
this layer.
|
||||||
|
|
||||||
|
We need to add 1 extra line and also increment the number of layers.
|
||||||
|
The result looks like below:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1061 1342
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation**
|
||||||
|
|
||||||
|
1. ``7767517``, it is still the same
|
||||||
|
2. ``1061 1342``, we have added an extra layer, so we need to update ``1060`` to ``1061``.
|
||||||
|
We don't need to change ``1342`` since the newly added layer has no inputs or outputs.
|
||||||
|
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512``
|
||||||
|
This line is newly added. Its explanation is given below:
|
||||||
|
|
||||||
|
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||||
|
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||||
|
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||||
|
- ``0=1``, 0 is the key and 1 is the value. MUST be ``0=1``
|
||||||
|
- ``1=12``, 1 is the key and 12 is the value of the
|
||||||
|
parameter ``--num-encoder-layers`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``2=32``, 2 is the key and 32 is the value of the
|
||||||
|
parameter ``--memory-size`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``3=31``, 3 is the key and 31 is the value of the
|
||||||
|
parameter ``--cnn-module-kernel`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``4=8``, 4 is the key and 8 is the value of the
|
||||||
|
parameter ``--left-context-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``5=32``, 5 is the key and 32 is the value of the
|
||||||
|
parameter ``--chunk-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``6=8``, 6 is the key and 8 is the value of the
|
||||||
|
parameter ``--right-context-length`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``7=512``, 7 is the key and 512 is the value of the
|
||||||
|
parameter ``--encoder-dim`` that you provided when running
|
||||||
|
``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
|
||||||
|
For ease of reference, we list the key-value pairs that you need to add
|
||||||
|
in the following table. If your model has a different setting, please
|
||||||
|
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||||
|
will be ``SAD``.
|
||||||
|
|
||||||
|
+------+-----------------------------+
|
||||||
|
| key | value |
|
||||||
|
+======+=============================+
|
||||||
|
| 0 | 1 (fixed) |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 1 | ``--num-encoder-layers`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 2 | ``--memory-size`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 3 | ``--cnn-module-kernel`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 4 | ``--left-context-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 5 | ``--chunk-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 6 | ``--right-context-length`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 7 | ``--encoder-dim`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
|
||||||
|
4. ``Input in0 0 1 in0``. No need to change it.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||||
|
number of layers. In our case, update ``1060`` to ``1061``. Otherwise,
|
||||||
|
you will be SAD later.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||||
|
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||||
|
supported only in `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||||
|
the ``param`` file! You don't need to change the ``bin`` file.
|
||||||
|
|
||||||
|
Now you can use this model in `sherpa-ncnn`_.
|
||||||
|
Please refer to the following documentation:
|
||||||
|
|
||||||
|
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||||
|
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||||
|
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||||
|
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||||
|
|
||||||
|
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||||
|
|
||||||
|
You can find more usages there.
|
||||||
|
|
||||||
|
7. (Optional) int8 quantization with sherpa-ncnn
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
This step is optional.
|
||||||
|
|
||||||
|
In this step, we describe how to quantize our model with ``int8``.
|
||||||
|
|
||||||
|
Change :ref:`conv-emformer-step-4-export-torchscript-model-via-pnnx` to
|
||||||
|
disable ``fp16`` when using ``pnnx``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt fp16=0
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt fp16=0
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
|
||||||
|
support quantizing the decoder model yet. We will update this documentation
|
||||||
|
once `ncnn`_ supports it. (Maybe in this year, 2023).
|
||||||
|
|
||||||
|
It will generate the following files
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*_jit_trace-pnnx.ncnn.{param,bin}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 283M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 79K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
Let us compare again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes are doubled when we disable ``fp16``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You can again use ``streaming-ncnn-decode.py`` to test the exported models.
|
||||||
|
|
||||||
|
Next, follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to modify ``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Change
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1060 1342
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
to
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
1061 1342
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to change the values for ``SherpaMetaData`` if your model uses a different setting.
|
||||||
|
|
||||||
|
|
||||||
|
Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
|
||||||
|
`sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We will download sherpa-ncnn to $HOME/open-source/
|
||||||
|
# You can change it to anywhere you like.
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
|
||||||
|
cd open-source
|
||||||
|
git clone https://github.com/k2-fsa/sherpa-ncnn
|
||||||
|
cd sherpa-ncnn
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j 4
|
||||||
|
|
||||||
|
./bin/generate-int8-scale-table
|
||||||
|
|
||||||
|
export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
|
||||||
|
|
||||||
|
The output of the above commands are:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
(py38) kuangfangjun:build$ generate-int8-scale-table
|
||||||
|
Please provide 10 arg. Currently given: 1
|
||||||
|
Usage:
|
||||||
|
generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
|
||||||
|
|
||||||
|
Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
|
||||||
|
|
||||||
|
We need to create a file ``wave_filenames.txt``, in which we need to put
|
||||||
|
some calibration wave files. For testing purpose, we put the ``test_wavs``
|
||||||
|
from the pre-trained model repository `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
cat <<EOF > wave_filenames.txt
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
../test_wavs/1221-135766-0001.wav
|
||||||
|
../test_wavs/1221-135766-0002.wav
|
||||||
|
EOF
|
||||||
|
|
||||||
|
Now we can calculate the scales needed for quantization with the calibration data:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
generate-int8-scale-table \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder-scale-table.txt \
|
||||||
|
./joiner-scale-table.txt \
|
||||||
|
./wave_filenames.txt
|
||||||
|
|
||||||
|
The output logs are in the following:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/generate-int-8-scale-table-for-conv-emformer.txt
|
||||||
|
|
||||||
|
It generates the following two files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ ls -lh encoder-scale-table.txt joiner-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 955K Jan 11 17:28 encoder-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 18K Jan 11 17:28 joiner-scale-table.txt
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Definitely, you need more calibration data to compute the scale table.
|
||||||
|
|
||||||
|
Finally, let us use the scale table to quantize our models into ``int8``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8
|
||||||
|
|
||||||
|
usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
|
||||||
|
|
||||||
|
First, we quantize the encoder model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./encoder-scale-table.txt
|
||||||
|
|
||||||
|
Next, we quantize the joiner model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./joiner-scale-table.txt
|
||||||
|
|
||||||
|
The above two commands generate the following 4 files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 99M Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 78K Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 774K Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 496 Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
|
||||||
|
Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
|
||||||
|
|
||||||
|
You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
|
||||||
|
and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
|
||||||
|
|
||||||
|
For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
|
||||||
|
replace the following invocation:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
|
||||||
|
The following table compares again the file sizes:
|
||||||
|
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.int8.bin | 99 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.int8.bin | 774 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the model after ``int8`` quantization
|
||||||
|
are much smaller.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Currently, only linear layers and convolutional layers are quantized
|
||||||
|
with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You need to test the recognition accuracy after ``int8`` quantization.
|
||||||
|
|
||||||
|
You can find the speed comparison at `<https://github.com/k2-fsa/sherpa-ncnn/issues/44>`_.
|
||||||
|
|
||||||
|
|
||||||
|
That's it! Have fun with `sherpa-ncnn`_!
|
||||||
644
docs/source/model-export/export-ncnn-lstm.rst
Normal file
@ -0,0 +1,644 @@
|
|||||||
|
.. _export_lstm_transducer_models_to_ncnn:
|
||||||
|
|
||||||
|
Export LSTM transducer models to ncnn
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
We use the pre-trained model from the following repository as an example:
|
||||||
|
|
||||||
|
`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
|
||||||
|
|
||||||
|
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||||
|
may ``not`` work.
|
||||||
|
|
||||||
|
1. Download the pre-trained model
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You have to install `git-lfs`_ before you continue.
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
|
||||||
|
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||||
|
|
||||||
|
In the above code, we downloaded the pre-trained model into the directory
|
||||||
|
``egs/librispeech/ASR/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03``.
|
||||||
|
|
||||||
|
2. Install ncnn and pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
|
||||||
|
|
||||||
|
|
||||||
|
3. Export the model via torch.jit.trace()
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
First, let us rename our pre-trained model:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp
|
||||||
|
|
||||||
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
Next, we use the following code to export our model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
dir=./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
||||||
|
|
||||||
|
./lstm_transducer_stateless2/export-for-ncnn.py \
|
||||||
|
--exp-dir $dir/exp \
|
||||||
|
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--num-encoder-layers 12 \
|
||||||
|
--encoder-dim 512 \
|
||||||
|
--rnn-hidden-size 1024
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
|
||||||
|
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||||
|
|
||||||
|
If you have trained a model by yourself and if you have all checkpoints
|
||||||
|
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||||
|
and select the best combination with with ``--use-averaged-model 1``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You will see the following log output:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/export-lstm-transducer-for-ncnn-output.txt
|
||||||
|
|
||||||
|
The log shows the model has ``84176356`` parameters, i.e., ``~84 M``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 324M Feb 17 10:34 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
|
||||||
|
|
||||||
|
You can see that the file size of the pre-trained model is ``324 MB``, which
|
||||||
|
is roughly equal to ``84176356*4/1024/1024 = 321.107 MB``.
|
||||||
|
|
||||||
|
After running ``lstm_transducer_stateless2/export-for-ncnn.py``,
|
||||||
|
we will get the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*pnnx.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1010K Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 318M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
|
||||||
|
.. _lstm-transducer-step-4-export-torchscript-model-via-pnnx:
|
||||||
|
|
||||||
|
4. Export torchscript model via pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Make sure you have set up the ``PATH`` environment variable
|
||||||
|
in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
|
||||||
|
it will throw an error saying that ``pnnx`` could not be found.
|
||||||
|
|
||||||
|
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
It will generate the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*ncnn*{bin,param}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 159M Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1.5M Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
|
||||||
|
There are two types of files:
|
||||||
|
|
||||||
|
- ``param``: It is a text file containing the model architectures. You can
|
||||||
|
use a text editor to view its content.
|
||||||
|
- ``bin``: It is a binary file containing the model parameters.
|
||||||
|
|
||||||
|
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||||
|
|
||||||
|
.. see https://tableconvert.com/restructuredtext-generator
|
||||||
|
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+==================================+============+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin | 159 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin | 503 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin | 1.5 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the models after conversion are about one half
|
||||||
|
of the models before conversion:
|
||||||
|
|
||||||
|
- encoder: 318 MB vs 159 MB
|
||||||
|
- decoder: 1010 KB vs 503 KB
|
||||||
|
- joiner: 3.0 MB vs 1.5 MB
|
||||||
|
|
||||||
|
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||||
|
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||||
|
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||||
|
won't convert ``float32`` to ``float16``.
|
||||||
|
|
||||||
|
5. Test the exported models in icefall
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||||
|
building `ncnn`_.
|
||||||
|
|
||||||
|
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||||
|
The generated 6 files are what we need. You can use the following code to
|
||||||
|
test the converted models:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
||||||
|
--tokens ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||||
|
only 1 wave file as input.
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
|
||||||
|
|
||||||
|
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||||
|
|
||||||
|
.. _lstm-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||||
|
|
||||||
|
6. Modify the exported encoder for sherpa-ncnn
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||||
|
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
7767517
|
||||||
|
267 379
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation** of the above three lines:
|
||||||
|
|
||||||
|
1. ``7767517``, it is a magic number and should not be changed.
|
||||||
|
2. ``267 379``, the first number ``267`` specifies the number of layers
|
||||||
|
in this file, while ``379`` specifies the number of intermediate outputs
|
||||||
|
of this file
|
||||||
|
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||||
|
is the layer name of this layer; ``0`` means this layer has no input;
|
||||||
|
``1`` means this layer has one output; ``in0`` is the output name of
|
||||||
|
this layer.
|
||||||
|
|
||||||
|
We need to add 1 extra line and also increment the number of layers.
|
||||||
|
The result looks like below:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
268 379
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation**
|
||||||
|
|
||||||
|
1. ``7767517``, it is still the same
|
||||||
|
2. ``268 379``, we have added an extra layer, so we need to update ``267`` to ``268``.
|
||||||
|
We don't need to change ``379`` since the newly added layer has no inputs or outputs.
|
||||||
|
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024``
|
||||||
|
This line is newly added. Its explanation is given below:
|
||||||
|
|
||||||
|
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||||
|
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||||
|
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||||
|
- ``0=3``, 0 is the key and 3 is the value. MUST be ``0=3``
|
||||||
|
- ``1=12``, 1 is the key and 12 is the value of the
|
||||||
|
parameter ``--num-encoder-layers`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``2=512``, 2 is the key and 512 is the value of the
|
||||||
|
parameter ``--encoder-dim`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
- ``3=1024``, 3 is the key and 1024 is the value of the
|
||||||
|
parameter ``--rnn-hidden-size`` that you provided when running
|
||||||
|
``./lstm_transducer_stateless2/export-for-ncnn.py``.
|
||||||
|
|
||||||
|
For ease of reference, we list the key-value pairs that you need to add
|
||||||
|
in the following table. If your model has a different setting, please
|
||||||
|
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||||
|
will be ``SAD``.
|
||||||
|
|
||||||
|
+------+-----------------------------+
|
||||||
|
| key | value |
|
||||||
|
+======+=============================+
|
||||||
|
| 0 | 3 (fixed) |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 1 | ``--num-encoder-layers`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 2 | ``--encoder-dim`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
| 3 | ``--rnn-hidden-size`` |
|
||||||
|
+------+-----------------------------+
|
||||||
|
|
||||||
|
4. ``Input in0 0 1 in0``. No need to change it.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||||
|
number of layers. In our case, update ``267`` to ``268``. Otherwise,
|
||||||
|
you will be SAD later.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||||
|
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||||
|
supported only in `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||||
|
the ``param`` file! You don't need to change the ``bin`` file.
|
||||||
|
|
||||||
|
Now you can use this model in `sherpa-ncnn`_.
|
||||||
|
Please refer to the following documentation:
|
||||||
|
|
||||||
|
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||||
|
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||||
|
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||||
|
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||||
|
|
||||||
|
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||||
|
|
||||||
|
You can find more usages there.
|
||||||
|
|
||||||
|
7. (Optional) int8 quantization with sherpa-ncnn
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
This step is optional.
|
||||||
|
|
||||||
|
In this step, we describe how to quantize our model with ``int8``.
|
||||||
|
|
||||||
|
Change :ref:`lstm-transducer-step-4-export-torchscript-model-via-pnnx` to
|
||||||
|
disable ``fp16`` when using ``pnnx``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt fp16=0
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt fp16=0
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
|
||||||
|
support quantizing the decoder model yet. We will update this documentation
|
||||||
|
once `ncnn`_ supports it. (Maybe in this year, 2023).
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*_jit_trace-pnnx.ncnn.{param,bin}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 317M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
|
||||||
|
Let us compare again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes are doubled when we disable ``fp16``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You can again use ``streaming-ncnn-decode.py`` to test the exported models.
|
||||||
|
|
||||||
|
Next, follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to modify ``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Change
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
267 379
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
to
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
268 379
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=3 1=12 2=512 3=1024
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
|
||||||
|
to change the values for ``SherpaMetaData`` if your model uses a different setting.
|
||||||
|
|
||||||
|
Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
|
||||||
|
`sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
# We will download sherpa-ncnn to $HOME/open-source/
|
||||||
|
# You can change it to anywhere you like.
|
||||||
|
cd $HOME
|
||||||
|
mkdir -p open-source
|
||||||
|
|
||||||
|
cd open-source
|
||||||
|
git clone https://github.com/k2-fsa/sherpa-ncnn
|
||||||
|
cd sherpa-ncnn
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake ..
|
||||||
|
make -j 4
|
||||||
|
|
||||||
|
./bin/generate-int8-scale-table
|
||||||
|
|
||||||
|
export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
|
||||||
|
|
||||||
|
The output of the above commands are:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
(py38) kuangfangjun:build$ generate-int8-scale-table
|
||||||
|
Please provide 10 arg. Currently given: 1
|
||||||
|
Usage:
|
||||||
|
generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
|
||||||
|
|
||||||
|
Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
|
||||||
|
|
||||||
|
We need to create a file ``wave_filenames.txt``, in which we need to put
|
||||||
|
some calibration wave files. For testing purpose, we put the ``test_wavs``
|
||||||
|
from the pre-trained model repository
|
||||||
|
`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
cat <<EOF > wave_filenames.txt
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
../test_wavs/1221-135766-0001.wav
|
||||||
|
../test_wavs/1221-135766-0002.wav
|
||||||
|
EOF
|
||||||
|
|
||||||
|
Now we can calculate the scales needed for quantization with the calibration data:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
generate-int8-scale-table \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder-scale-table.txt \
|
||||||
|
./joiner-scale-table.txt \
|
||||||
|
./wave_filenames.txt
|
||||||
|
|
||||||
|
The output logs are in the following:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/generate-int-8-scale-table-for-lstm.txt
|
||||||
|
|
||||||
|
It generates the following two files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh encoder-scale-table.txt joiner-scale-table.txt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 345K Feb 17 12:13 encoder-scale-table.txt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 17K Feb 17 12:13 joiner-scale-table.txt
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Definitely, you need more calibration data to compute the scale table.
|
||||||
|
|
||||||
|
Finally, let us use the scale table to quantize our models into ``int8``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8
|
||||||
|
|
||||||
|
usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
|
||||||
|
|
||||||
|
First, we quantize the encoder model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./encoder-scale-table.txt
|
||||||
|
|
||||||
|
Next, we quantize the joiner model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ncnn2int8 \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./joiner-scale-table.txt
|
||||||
|
|
||||||
|
The above two commands generate the following 4 files:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 218M Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 21K Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 774K Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 496 Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.param
|
||||||
|
|
||||||
|
Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
|
||||||
|
|
||||||
|
You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
|
||||||
|
and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
|
||||||
|
|
||||||
|
For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
|
||||||
|
replace the following invocation:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
|
||||||
|
|
||||||
|
sherpa-ncnn \
|
||||||
|
../data/lang_bpe_500/tokens.txt \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.param \
|
||||||
|
./encoder_jit_trace-pnnx.ncnn.int8.bin \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
./decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
./joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
../test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
The following table compares again the file sizes:
|
||||||
|
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 318 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1010 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp16) | 1.5 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin (fp32) | 3.0 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.int8.bin | 218 MB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.int8.bin | 774 KB |
|
||||||
|
+----------------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file size of the joiner model after ``int8`` quantization
|
||||||
|
is much smaller. However, the size of the encoder model is even larger than
|
||||||
|
the ``fp16`` counterpart. The reason is that `ncnn`_ currently does not support
|
||||||
|
quantizing ``LSTM`` layers into ``8-bit``. Please see
|
||||||
|
`<https://github.com/Tencent/ncnn/issues/4532>`_
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Currently, only linear layers and convolutional layers are quantized
|
||||||
|
with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You need to test the recognition accuracy after ``int8`` quantization.
|
||||||
|
|
||||||
|
|
||||||
|
That's it! Have fun with `sherpa-ncnn`_!
|
||||||
383
docs/source/model-export/export-ncnn-zipformer.rst
Normal file
@ -0,0 +1,383 @@
|
|||||||
|
.. _export_streaming_zipformer_transducer_models_to_ncnn:
|
||||||
|
|
||||||
|
Export streaming Zipformer transducer models to ncnn
|
||||||
|
----------------------------------------------------
|
||||||
|
|
||||||
|
We use the pre-trained model from the following repository as an example:
|
||||||
|
|
||||||
|
`<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_
|
||||||
|
|
||||||
|
We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
Please use a more recent version of PyTorch. For instance, ``torch 1.8``
|
||||||
|
may ``not`` work.
|
||||||
|
|
||||||
|
1. Download the pre-trained model
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
You have to install `git-lfs`_ before you continue.
|
||||||
|
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
|
||||||
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
|
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
|
||||||
|
|
||||||
|
In the above code, we downloaded the pre-trained model into the directory
|
||||||
|
``egs/librispeech/ASR/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29``.
|
||||||
|
|
||||||
|
2. Install ncnn and pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
|
||||||
|
|
||||||
|
|
||||||
|
3. Export the model via torch.jit.trace()
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
First, let us rename our pre-trained model:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
|
||||||
|
|
||||||
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
|
||||||
|
cd ../..
|
||||||
|
|
||||||
|
Next, we use the following code to export our model:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
|
||||||
|
--bpe-model $dir/data/lang_bpe_500/bpe.model \
|
||||||
|
--exp-dir $dir/exp \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 99 \
|
||||||
|
--avg 1 \
|
||||||
|
\
|
||||||
|
--decode-chunk-len 32 \
|
||||||
|
--num-left-chunks 4 \
|
||||||
|
--num-encoder-layers "2,4,3,2,4" \
|
||||||
|
--feedforward-dims "1024,1024,2048,2048,1024" \
|
||||||
|
--nhead "8,8,8,8,8" \
|
||||||
|
--encoder-dims "384,384,384,384,384" \
|
||||||
|
--attention-dims "192,192,192,192,192" \
|
||||||
|
--encoder-unmasked-dims "256,256,256,256,256" \
|
||||||
|
--zipformer-downsampling-factors "1,2,4,8,2" \
|
||||||
|
--cnn-module-kernels "31,31,31,31,31" \
|
||||||
|
--decoder-dim 512 \
|
||||||
|
--joiner-dim 512
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
If your model has different configuration parameters, please change them accordingly.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
|
||||||
|
There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
|
||||||
|
|
||||||
|
If you have trained a model by yourself and if you have all checkpoints
|
||||||
|
available, please first use ``decode.py`` to tune ``--epoch --avg``
|
||||||
|
and select the best combination with with ``--use-averaged-model 1``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
You will see the following log output:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/export-zipformer-transducer-for-ncnn-output.txt
|
||||||
|
|
||||||
|
The log shows the model has ``69920376`` parameters, i.e., ``~69.9 M``.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 269M Jan 12 12:53 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
|
||||||
|
|
||||||
|
You can see that the file size of the pre-trained model is ``269 MB``, which
|
||||||
|
is roughly equal to ``69920376*4/1024/1024 = 266.725 MB``.
|
||||||
|
|
||||||
|
After running ``pruned_transducer_stateless7_streaming/export-for-ncnn.py``,
|
||||||
|
we will get the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*pnnx.pt
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1022K Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 266M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 2.8M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
.. _zipformer-transducer-step-4-export-torchscript-model-via-pnnx:
|
||||||
|
|
||||||
|
4. Export torchscript model via pnnx
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
Make sure you have set up the ``PATH`` environment variable
|
||||||
|
in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
|
||||||
|
it will throw an error saying that ``pnnx`` could not be found.
|
||||||
|
|
||||||
|
Now, it's time to export our models to `ncnn`_ via ``pnnx``.
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
|
||||||
|
|
||||||
|
pnnx ./encoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./decoder_jit_trace-pnnx.pt
|
||||||
|
pnnx ./joiner_jit_trace-pnnx.pt
|
||||||
|
|
||||||
|
It will generate the following files:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*ncnn*{bin,param}
|
||||||
|
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 509K Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 437 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 133M Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 152K Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 1.4M Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin
|
||||||
|
-rw-r--r-- 1 kuangfangjun root 488 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param
|
||||||
|
|
||||||
|
There are two types of files:
|
||||||
|
|
||||||
|
- ``param``: It is a text file containing the model architectures. You can
|
||||||
|
use a text editor to view its content.
|
||||||
|
- ``bin``: It is a binary file containing the model parameters.
|
||||||
|
|
||||||
|
We compare the file sizes of the models below before and after converting via ``pnnx``:
|
||||||
|
|
||||||
|
.. see https://tableconvert.com/restructuredtext-generator
|
||||||
|
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| File name | File size |
|
||||||
|
+==================================+============+
|
||||||
|
| encoder_jit_trace-pnnx.pt | 266 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.pt | 1022 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.pt | 2.8 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| encoder_jit_trace-pnnx.ncnn.bin | 133 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| decoder_jit_trace-pnnx.ncnn.bin | 509 KB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
| joiner_jit_trace-pnnx.ncnn.bin | 1.4 MB |
|
||||||
|
+----------------------------------+------------+
|
||||||
|
|
||||||
|
You can see that the file sizes of the models after conversion are about one half
|
||||||
|
of the models before conversion:
|
||||||
|
|
||||||
|
- encoder: 266 MB vs 133 MB
|
||||||
|
- decoder: 1022 KB vs 509 KB
|
||||||
|
- joiner: 2.8 MB vs 1.4 MB
|
||||||
|
|
||||||
|
The reason is that by default ``pnnx`` converts ``float32`` parameters
|
||||||
|
to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
|
||||||
|
for ``float16``. Thus, it is ``twice smaller`` after conversion.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
|
||||||
|
won't convert ``float32`` to ``float16``.
|
||||||
|
|
||||||
|
5. Test the exported models in icefall
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have set up the environment variable ``PYTHONPATH`` when
|
||||||
|
building `ncnn`_.
|
||||||
|
|
||||||
|
Now we have successfully converted our pre-trained model to `ncnn`_ format.
|
||||||
|
The generated 6 files are what we need. You can use the following code to
|
||||||
|
test the converted models:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
||||||
|
--tokens ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt \
|
||||||
|
--encoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--encoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--decoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param \
|
||||||
|
--decoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
||||||
|
--joiner-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param \
|
||||||
|
--joiner-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
||||||
|
./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
|
||||||
|
only 1 wave file as input.
|
||||||
|
|
||||||
|
The output is given below:
|
||||||
|
|
||||||
|
.. literalinclude:: ./code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
|
||||||
|
|
||||||
|
Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
|
||||||
|
|
||||||
|
.. _zipformer-modify-the-exported-encoder-for-sherpa-ncnn:
|
||||||
|
|
||||||
|
6. Modify the exported encoder for sherpa-ncnn
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
In order to use the exported models in `sherpa-ncnn`_, we have to modify
|
||||||
|
``encoder_jit_trace-pnnx.ncnn.param``.
|
||||||
|
|
||||||
|
Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
|
||||||
|
|
||||||
|
.. code-block::
|
||||||
|
|
||||||
|
7767517
|
||||||
|
2028 2547
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation** of the above three lines:
|
||||||
|
|
||||||
|
1. ``7767517``, it is a magic number and should not be changed.
|
||||||
|
2. ``2028 2547``, the first number ``2028`` specifies the number of layers
|
||||||
|
in this file, while ``2547`` specifies the number of intermediate outputs
|
||||||
|
of this file
|
||||||
|
3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
|
||||||
|
is the layer name of this layer; ``0`` means this layer has no input;
|
||||||
|
``1`` means this layer has one output; ``in0`` is the output name of
|
||||||
|
this layer.
|
||||||
|
|
||||||
|
We need to add 1 extra line and also increment the number of layers.
|
||||||
|
The result looks like below:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
7767517
|
||||||
|
2029 2547
|
||||||
|
SherpaMetaData sherpa_meta_data1 0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
|
||||||
|
Input in0 0 1 in0
|
||||||
|
|
||||||
|
**Explanation**
|
||||||
|
|
||||||
|
1. ``7767517``, it is still the same
|
||||||
|
2. ``2029 2547``, we have added an extra layer, so we need to update ``2028`` to ``2029``.
|
||||||
|
We don't need to change ``2547`` since the newly added layer has no inputs or outputs.
|
||||||
|
3. ``SherpaMetaData sherpa_meta_data1 0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31``
|
||||||
|
This line is newly added. Its explanation is given below:
|
||||||
|
|
||||||
|
- ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
|
||||||
|
- ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
|
||||||
|
- ``0 0`` means this layer has no inputs or output. Must be ``0 0``
|
||||||
|
- ``0=2``, 0 is the key and 2 is the value. MUST be ``0=2``
|
||||||
|
- ``1=32``, 1 is the key and 32 is the value of the
|
||||||
|
parameter ``--decode-chunk-len`` that you provided when running
|
||||||
|
``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``2=4``, 2 is the key and 4 is the value of the
|
||||||
|
parameter ``--num-left-chunks`` that you provided when running
|
||||||
|
``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``3=7``, 3 is the key and 7 is the value of for the amount of padding
|
||||||
|
used in the Conv2DSubsampling layer. It should be 7 for zipformer
|
||||||
|
if you don't change zipformer.py.
|
||||||
|
- ``-23316=5,2,4,3,2,4``, attribute 16, this is an array attribute.
|
||||||
|
It is attribute 16 since -23300 - (-23316) = 16.
|
||||||
|
The first element of the array is the length of the array, which is 5 in our case.
|
||||||
|
``2,4,3,2,4`` is the value of ``--num-encoder-layers``that you provided
|
||||||
|
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``-23317=5,384,384,384,384,384``, attribute 17.
|
||||||
|
The first element of the array is the length of the array, which is 5 in our case.
|
||||||
|
``384,384,384,384,384`` is the value of ``--encoder-dims``that you provided
|
||||||
|
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``-23318=5,192,192,192,192,192``, attribute 18.
|
||||||
|
The first element of the array is the length of the array, which is 5 in our case.
|
||||||
|
``192,192,192,192,192`` is the value of ``--attention-dims`` that you provided
|
||||||
|
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``-23319=5,1,2,4,8,2``, attribute 19.
|
||||||
|
The first element of the array is the length of the array, which is 5 in our case.
|
||||||
|
``1,2,4,8,2`` is the value of ``--zipformer-downsampling-factors`` that you provided
|
||||||
|
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
- ``-23320=5,31,31,31,31,31``, attribute 20.
|
||||||
|
The first element of the array is the length of the array, which is 5 in our case.
|
||||||
|
``31,31,31,31,31`` is the value of ``--cnn-module-kernels`` that you provided
|
||||||
|
when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
|
||||||
|
|
||||||
|
For ease of reference, we list the key-value pairs that you need to add
|
||||||
|
in the following table. If your model has a different setting, please
|
||||||
|
change the values for ``SherpaMetaData`` accordingly. Otherwise, you
|
||||||
|
will be ``SAD``.
|
||||||
|
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
| key | value |
|
||||||
|
+==========+============================================+
|
||||||
|
| 0 | 2 (fixed) |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
| 1 | ``-decode-chunk-len`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
| 2 | ``--num-left-chunks`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
| 3 | 7 (if you don't change code) |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|-23316 | ``--num-encoder-layer`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|-23317 | ``--encoder-dims`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|-23318 | ``--attention-dims`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|-23319 | ``--zipformer-downsampling-factors`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|-23320 | ``--cnn-module-kernels`` |
|
||||||
|
+----------+--------------------------------------------+
|
||||||
|
|
||||||
|
4. ``Input in0 0 1 in0``. No need to change it.
|
||||||
|
|
||||||
|
.. caution::
|
||||||
|
|
||||||
|
When you add a new layer ``SherpaMetaData``, please remember to update the
|
||||||
|
number of layers. In our case, update ``2028`` to ``2029``. Otherwise,
|
||||||
|
you will be SAD later.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
After adding the new layer ``SherpaMetaData``, you cannot use this model
|
||||||
|
with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
|
||||||
|
supported only in `sherpa-ncnn`_.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
`ncnn`_ is very flexible. You can add new layers to it just by text-editing
|
||||||
|
the ``param`` file! You don't need to change the ``bin`` file.
|
||||||
|
|
||||||
|
Now you can use this model in `sherpa-ncnn`_.
|
||||||
|
Please refer to the following documentation:
|
||||||
|
|
||||||
|
- Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
|
||||||
|
- ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
|
||||||
|
- ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
|
||||||
|
- Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
|
||||||
|
|
||||||
|
We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
|
||||||
|
|
||||||
|
You can find more usages there.
|
||||||
@ -1,12 +1,37 @@
|
|||||||
Export to ncnn
|
Export to ncnn
|
||||||
==============
|
==============
|
||||||
|
|
||||||
We support exporting LSTM transducer models to `ncnn <https://github.com/tencent/ncnn>`_.
|
We support exporting the following models
|
||||||
|
to `ncnn <https://github.com/tencent/ncnn>`_:
|
||||||
|
|
||||||
Please refer to :ref:`export-model-for-ncnn` for details.
|
- `Zipformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_
|
||||||
|
|
||||||
We also provide `<https://github.com/k2-fsa/sherpa-ncnn>`_
|
- `LSTM transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
|
||||||
performing speech recognition using ``ncnn`` with exported models.
|
|
||||||
It has been tested on Linux, macOS, Windows, and Raspberry Pi. The project is
|
- `ConvEmformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conv_emformer_transducer_stateless2>`_
|
||||||
self-contained and can be statically linked to produce a binary containing
|
|
||||||
everything needed.
|
We also provide `sherpa-ncnn`_
|
||||||
|
for performing speech recognition using `ncnn`_ with exported models.
|
||||||
|
It has been tested on the following platforms:
|
||||||
|
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
- Windows
|
||||||
|
- ``Android``
|
||||||
|
- ``iOS``
|
||||||
|
- ``Raspberry Pi``
|
||||||
|
- `爱芯派 <https://wiki.sipeed.com/hardware/zh/>`_ (`MAIX-III AXera-Pi <https://wiki.sipeed.com/hardware/en/maixIII/ax-pi/axpi.html>`_).
|
||||||
|
- `RV1126 <https://www.rock-chips.com/a/en/products/RV11_Series/2020/0427/1076.html>`_
|
||||||
|
|
||||||
|
`sherpa-ncnn`_ is self-contained and can be statically linked to produce
|
||||||
|
a binary containing everything needed. Please refer
|
||||||
|
to its documentation for details:
|
||||||
|
|
||||||
|
- `<https://k2-fsa.github.io/sherpa/ncnn/index.html>`_
|
||||||
|
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
|
||||||
|
export-ncnn-zipformer
|
||||||
|
export-ncnn-conv-emformer
|
||||||
|
export-ncnn-lstm
|
||||||
|
|||||||
@ -1,69 +1,95 @@
|
|||||||
Export to ONNX
|
Export to ONNX
|
||||||
==============
|
==============
|
||||||
|
|
||||||
In this section, we describe how to export models to ONNX.
|
In this section, we describe how to export models to `ONNX`_.
|
||||||
|
|
||||||
|
In each recipe, there is a file called ``export-onnx.py``, which is used
|
||||||
|
to export trained models to `ONNX`_.
|
||||||
|
|
||||||
|
There is also a file named ``onnx_pretrained.py``, which you can use
|
||||||
|
the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
|
||||||
|
|
||||||
|
sherpa-onnx
|
||||||
|
-----------
|
||||||
|
|
||||||
|
We have a separate repository `sherpa-onnx`_ for deploying your exported models
|
||||||
|
on various platforms such as:
|
||||||
|
|
||||||
|
- iOS
|
||||||
|
- Android
|
||||||
|
- Raspberry Pi
|
||||||
|
- Linux/macOS/Windows
|
||||||
|
|
||||||
|
|
||||||
|
Please see the documentation of `sherpa-onnx`_ for details:
|
||||||
|
|
||||||
|
`<https://k2-fsa.github.io/sherpa/onnx/index.html>`_
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
|
||||||
|
In the following, we demonstrate how to export a streaming Zipformer pre-trained
|
||||||
|
model from
|
||||||
|
`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11>`_
|
||||||
|
to `ONNX`_.
|
||||||
|
|
||||||
|
Download the pre-trained model
|
||||||
|
------------------------------
|
||||||
|
|
||||||
.. hint::
|
.. hint::
|
||||||
|
|
||||||
Only non-streaming conformer transducer models are tested.
|
We assume you have installed `git-lfs`_.
|
||||||
|
|
||||||
|
|
||||||
When to use it
|
|
||||||
--------------
|
|
||||||
|
|
||||||
It you want to use an inference framework that supports ONNX
|
|
||||||
to run the pretrained model.
|
|
||||||
|
|
||||||
|
|
||||||
How to export
|
|
||||||
-------------
|
|
||||||
|
|
||||||
We use
|
|
||||||
`<https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless3>`_
|
|
||||||
as an example in the following.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
cd egs/librispeech/ASR
|
|
||||||
epoch=14
|
|
||||||
avg=2
|
|
||||||
|
|
||||||
./pruned_transducer_stateless3/export.py \
|
cd egs/librispeech/ASR
|
||||||
--exp-dir ./pruned_transducer_stateless3/exp \
|
|
||||||
--bpe-model data/lang_bpe_500/bpe.model \
|
|
||||||
--epoch $epoch \
|
|
||||||
--avg $avg \
|
|
||||||
--onnx 1
|
|
||||||
|
|
||||||
It will generate the following files inside ``pruned_transducer_stateless3/exp``:
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||||
|
repo=$(basename $repo_url)
|
||||||
|
|
||||||
- ``encoder.onnx``
|
pushd $repo
|
||||||
- ``decoder.onnx``
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||||
- ``joiner.onnx``
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
- ``joiner_encoder_proj.onnx``
|
cd exp
|
||||||
- ``joiner_decoder_proj.onnx``
|
ln -s pretrained.pt epoch-99.pt
|
||||||
|
popd
|
||||||
|
|
||||||
You can use ``./pruned_transducer_stateless3/exp/onnx_pretrained.py`` to decode
|
Export the model to ONNX
|
||||||
waves with the generated files:
|
------------------------
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
./pruned_transducer_stateless3/onnx_pretrained.py \
|
./pruned_transducer_stateless7_streaming/export-onnx.py \
|
||||||
--bpe-model ./data/lang_bpe_500/bpe.model \
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
||||||
--encoder-model-filename ./pruned_transducer_stateless3/exp/encoder.onnx \
|
--use-averaged-model 0 \
|
||||||
--decoder-model-filename ./pruned_transducer_stateless3/exp/decoder.onnx \
|
--epoch 99 \
|
||||||
--joiner-model-filename ./pruned_transducer_stateless3/exp/joiner.onnx \
|
--avg 1 \
|
||||||
--joiner-encoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_encoder_proj.onnx \
|
--decode-chunk-len 32 \
|
||||||
--joiner-decoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_decoder_proj.onnx \
|
--exp-dir $repo/exp/
|
||||||
/path/to/foo.wav \
|
|
||||||
/path/to/bar.wav \
|
|
||||||
/path/to/baz.wav
|
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
|
||||||
How to use the exported model
|
``export-onnx.py`` from different recipes has different options.
|
||||||
-----------------------------
|
|
||||||
|
|
||||||
We also provide `<https://github.com/k2-fsa/sherpa-onnx>`_
|
In the above example, ``--decode-chunk-len`` is specific for the
|
||||||
performing speech recognition using `onnxruntime <https://github.com/microsoft/onnxruntime>`_
|
streaming Zipformer. Other models won't have such an option.
|
||||||
with exported models.
|
|
||||||
It has been tested on Linux, macOS, and Windows.
|
It will generate the following 3 files in ``$repo/exp``
|
||||||
|
|
||||||
|
- ``encoder-epoch-99-avg-1.onnx``
|
||||||
|
- ``decoder-epoch-99-avg-1.onnx``
|
||||||
|
- ``joiner-epoch-99-avg-1.onnx``
|
||||||
|
|
||||||
|
Decode sound files with exported ONNX models
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
.. _export-model-with-torch-jit-script:
|
.. _export-model-with-torch-jit-script:
|
||||||
|
|
||||||
Export model with torch.jit.script()
|
Export model with torch.jit.script()
|
||||||
===================================
|
====================================
|
||||||
|
|
||||||
In this section, we describe how to export a model via
|
In this section, we describe how to export a model via
|
||||||
``torch.jit.script()``.
|
``torch.jit.script()``.
|
||||||
|
|||||||
@ -703,7 +703,7 @@ It will show you the following message:
|
|||||||
|
|
||||||
|
|
||||||
HLG decoding
|
HLG decoding
|
||||||
^^^^^^^^^^^^
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
|
Before Width: | Height: | Size: 334 KiB After Width: | Height: | Size: 334 KiB |
|
Before Width: | Height: | Size: 426 KiB After Width: | Height: | Size: 426 KiB |
|
Before Width: | Height: | Size: 441 KiB After Width: | Height: | Size: 441 KiB |
@ -19,4 +19,3 @@ It can be downloaded from `<https://www.openslr.org/33/>`_
|
|||||||
tdnn_lstm_ctc
|
tdnn_lstm_ctc
|
||||||
conformer_ctc
|
conformer_ctc
|
||||||
stateless_transducer
|
stateless_transducer
|
||||||
|
|
||||||
10
docs/source/recipes/Non-streaming-ASR/index.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
Non Streaming ASR
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
aishell/index
|
||||||
|
librispeech/index
|
||||||
|
timit/index
|
||||||
|
yesno/index
|
||||||
@ -888,7 +888,7 @@ It will show you the following message:
|
|||||||
|
|
||||||
|
|
||||||
CTC decoding
|
CTC decoding
|
||||||
^^^^^^^^^^^^
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@ -926,7 +926,7 @@ Its output is:
|
|||||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||||
|
|
||||||
HLG decoding
|
HLG decoding
|
||||||
^^^^^^^^^^^^
|
~~~~~~~~~~~~
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@ -966,7 +966,7 @@ The output is:
|
|||||||
|
|
||||||
|
|
||||||
HLG decoding + n-gram LM rescoring
|
HLG decoding + n-gram LM rescoring
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@ -1012,7 +1012,7 @@ The output is:
|
|||||||
|
|
||||||
|
|
||||||
HLG decoding + n-gram LM rescoring + attention decoder rescoring
|
HLG decoding + n-gram LM rescoring + attention decoder rescoring
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
@ -0,0 +1,223 @@
|
|||||||
|
Distillation with HuBERT
|
||||||
|
========================
|
||||||
|
|
||||||
|
This tutorial shows you how to perform knowledge distillation in `icefall`_
|
||||||
|
with the `LibriSpeech`_ dataset. The distillation method
|
||||||
|
used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD).
|
||||||
|
Please have a look at our paper `Predicting Multi-Codebook Vector Quantization Indexes for Knowledge Distillation <https://arxiv.org/abs/2211.00508>`_
|
||||||
|
for more details about MVQ-KD.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
This tutorial is based on recipe
|
||||||
|
`pruned_transducer_stateless4 <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless4>`_.
|
||||||
|
Currently, we only implement MVQ-KD in this recipe. However, MVQ-KD is theoretically applicable to all recipes
|
||||||
|
with only minor changes needed. Feel free to try out MVQ-KD in different recipes. If you
|
||||||
|
encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`_.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
We assume you have read the page :ref:`install icefall` and have setup
|
||||||
|
the environment for `icefall`_.
|
||||||
|
|
||||||
|
.. HINT::
|
||||||
|
|
||||||
|
We recommend you to use a GPU or several GPUs to run this recipe.
|
||||||
|
|
||||||
|
Data preparation
|
||||||
|
----------------
|
||||||
|
|
||||||
|
We first prepare necessary training data for `LibriSpeech`_.
|
||||||
|
This is the same as in :ref:`non_streaming_librispeech_pruned_transducer_stateless`.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
The data preparation is the same as other recipes on LibriSpeech dataset,
|
||||||
|
if you have finished this step, you can skip to :ref:`codebook_index_preparation` directly.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ cd egs/librispeech/ASR
|
||||||
|
$ ./prepare.sh
|
||||||
|
|
||||||
|
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||||
|
All you need to do is to run it.
|
||||||
|
|
||||||
|
The data preparation contains several stages, you can use the following two
|
||||||
|
options:
|
||||||
|
|
||||||
|
- ``--stage``
|
||||||
|
- ``--stop-stage``
|
||||||
|
|
||||||
|
to control which stage(s) should be run. By default, all stages are executed.
|
||||||
|
|
||||||
|
For example,
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ cd egs/librispeech/ASR
|
||||||
|
$ ./prepare.sh --stage 0 --stop-stage 0 # run only stage 0
|
||||||
|
$ ./prepare.sh --stage 2 --stop-stage 5 # run from stage 2 to stage 5
|
||||||
|
|
||||||
|
.. HINT::
|
||||||
|
|
||||||
|
If you have pre-downloaded the `LibriSpeech`_
|
||||||
|
dataset and the `musan`_ dataset, say,
|
||||||
|
they are saved in ``/tmp/LibriSpeech`` and ``/tmp/musan``, you can modify
|
||||||
|
the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
|
||||||
|
``./prepare.sh`` won't re-download them.
|
||||||
|
|
||||||
|
.. NOTE::
|
||||||
|
|
||||||
|
All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
|
||||||
|
are saved in ``./data`` directory.
|
||||||
|
|
||||||
|
We provide the following YouTube video showing how to run ``./prepare.sh``.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
|
||||||
|
the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
|
||||||
|
|
||||||
|
`<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
|
||||||
|
|
||||||
|
.. youtube:: ofEIoJL-mGM
|
||||||
|
|
||||||
|
|
||||||
|
.. _codebook_index_preparation:
|
||||||
|
|
||||||
|
Codebook index preparation
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Here, we prepare necessary data for MVQ-KD. This requires the generation
|
||||||
|
of codebook indexes (please read our `paper <https://arxiv.org/abs/2211.00508>`_.
|
||||||
|
if you are interested in details). In this tutorial, we use the pre-computed
|
||||||
|
codebook indexes for convenience. The only thing you need to do is to
|
||||||
|
run `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
There are 5 stages in total, the first and second stage will be automatically skipped
|
||||||
|
when choosing to downloaded codebook indexes prepared by `icefall`_.
|
||||||
|
Of course, you can extract and compute the codebook indexes by yourself. This
|
||||||
|
will require you downloading a HuBERT-XL model and it can take a while for
|
||||||
|
the extraction of codebook indexes.
|
||||||
|
|
||||||
|
|
||||||
|
As usual, you can control the stages you want to run by specifying the following
|
||||||
|
two options:
|
||||||
|
|
||||||
|
- ``--stage``
|
||||||
|
- ``--stop-stage``
|
||||||
|
|
||||||
|
For example,
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ cd egs/librispeech/ASR
|
||||||
|
$ ./distillation_with_hubert.sh --stage 0 --stop-stage 0 # run only stage 0
|
||||||
|
$ ./distillation_with_hubert.sh --stage 2 --stop-stage 4 # run from stage 2 to stage 5
|
||||||
|
|
||||||
|
Here are a few options in `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_
|
||||||
|
you need to know before you proceed.
|
||||||
|
|
||||||
|
- ``--full_libri`` If True, use full 960h data. Otherwise only ``train-clean-100`` will be used
|
||||||
|
- ``--use_extracted_codebook`` If True, the first two stages will be skipped and the codebook
|
||||||
|
indexes uploaded by us will be downloaded.
|
||||||
|
|
||||||
|
Since we are using the pre-computed codebook indexes, we set
|
||||||
|
``use_extracted_codebook=True``. If you want to do full `LibriSpeech`_
|
||||||
|
experiments, please set ``full_libri=True``.
|
||||||
|
|
||||||
|
The following command downloads the pre-computed codebook indexes
|
||||||
|
and prepares MVQ-augmented training manifests.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ ./distillation_with_hubert.sh --stage 2 --stop-stage 2 # run only stage 2
|
||||||
|
|
||||||
|
Please see the
|
||||||
|
following screenshot for the output of an example execution.
|
||||||
|
|
||||||
|
.. figure:: ./images/distillation_codebook.png
|
||||||
|
:width: 800
|
||||||
|
:alt: Downloading codebook indexes and preparing training manifest.
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
Downloading codebook indexes and preparing training manifest.
|
||||||
|
|
||||||
|
.. hint::
|
||||||
|
|
||||||
|
The codebook indexes we prepared for you in this tutorial
|
||||||
|
are extracted from the 36-th layer of a fine-tuned HuBERT-XL model
|
||||||
|
with 8 codebooks. If you want to try other configurations, please
|
||||||
|
set ``use_extracted_codebook=False`` and set ``embedding_layer`` and
|
||||||
|
``num_codebooks`` by yourself.
|
||||||
|
|
||||||
|
Now, you should see the following files under the directory ``./data/vq_fbank_layer36_cb8``.
|
||||||
|
|
||||||
|
.. figure:: ./images/distillation_directory.png
|
||||||
|
:width: 800
|
||||||
|
:alt: MVQ-augmented training manifests
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
MVQ-augmented training manifests.
|
||||||
|
|
||||||
|
Whola! You are ready to perform knowledge distillation training now!
|
||||||
|
|
||||||
|
Training
|
||||||
|
--------
|
||||||
|
|
||||||
|
To perform training, please run stage 3 by executing the following command.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
$ ./prepare.sh --stage 3 --stop-stage 3 # run MVQ training
|
||||||
|
|
||||||
|
Here is the code snippet for training:
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
WORLD_SIZE=$(echo ${CUDA_VISIBLE_DEVICES} | awk '{n=split($1, _, ","); print n}')
|
||||||
|
|
||||||
|
./pruned_transducer_stateless6/train.py \
|
||||||
|
--manifest-dir ./data/vq_fbank_layer36_cb8 \
|
||||||
|
--master-port 12359 \
|
||||||
|
--full-libri $full_libri \
|
||||||
|
--spec-aug-time-warp-factor -1 \
|
||||||
|
--max-duration 300 \
|
||||||
|
--world-size ${WORLD_SIZE} \
|
||||||
|
--num-epochs 30 \
|
||||||
|
--exp-dir $exp_dir \
|
||||||
|
--enable-distillation True \
|
||||||
|
--codebook-loss-scale 0.01
|
||||||
|
|
||||||
|
There are a few training arguments in the following
|
||||||
|
training commands that should be paid attention to.
|
||||||
|
|
||||||
|
- ``--enable-distillation`` If True, knowledge distillation training is enabled.
|
||||||
|
- ``--codebook-loss-scale`` The scale of the knowledge distillation loss.
|
||||||
|
- ``--manifest-dir`` The path to the MVQ-augmented manifest.
|
||||||
|
|
||||||
|
|
||||||
|
Decoding
|
||||||
|
--------
|
||||||
|
|
||||||
|
After training finished, you can test the performance on using
|
||||||
|
the following command.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
export CUDA_VISIBLE_DEVICES=0
|
||||||
|
./pruned_transducer_stateless6/train.py \
|
||||||
|
--decoding-method "modified_beam_search" \
|
||||||
|
--epoch 30 \
|
||||||
|
--avg 10 \
|
||||||
|
--max-duration 200 \
|
||||||
|
--exp-dir $exp_dir \
|
||||||
|
--enable-distillation True
|
||||||
|
|
||||||
|
You should get similar results as `here <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS-100hours.md#distillation-with-hubert>`_.
|
||||||
|
|
||||||
|
That's all! Feel free to experiment with your own setups and report your results.
|
||||||
|
If you encounter any problems during training, please open up an issue `here <https://github.com/k2-fsa/icefall/issues>`_.
|
||||||