mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
1604 lines
49 KiB
Bash
Executable File
1604 lines
49 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -ex
|
|
|
|
log() {
|
|
# This function is from espnet
|
|
local fname=${BASH_SOURCE[1]##*/}
|
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
}
|
|
|
|
cd egs/librispeech/ASR
|
|
|
|
function prepare_data() {
|
|
# We don't download the LM file since it is so large that it will
|
|
# cause OOM error for CI later.
|
|
mkdir -p download/lm
|
|
pushd download/lm
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lm-norm.txt.gz
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lexicon.txt
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-vocab.txt
|
|
ls -lh
|
|
gunzip librispeech-lm-norm.txt.gz
|
|
|
|
ls -lh
|
|
popd
|
|
|
|
pushd download/
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2
|
|
tar xf LibriSpeech.tar.bz2
|
|
rm LibriSpeech.tar.bz2
|
|
|
|
cd LibriSpeech
|
|
ln -s train-clean-100 train-clean-360
|
|
ln -s train-other-500 train-other-500
|
|
popd
|
|
|
|
mkdir -p data/manifests
|
|
|
|
lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests
|
|
ls -lh data/manifests
|
|
|
|
./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False
|
|
ls -lh data/fbank
|
|
|
|
./prepare.sh --stage 5 --stop-stage 6
|
|
}
|
|
|
|
function run_diagnostics() {
|
|
./zipformer/train.py \
|
|
--world-size 1 \
|
|
--num-epochs 1 \
|
|
--start-epoch 1 \
|
|
--use-fp16 0 \
|
|
--exp-dir zipformer/exp-small \
|
|
--causal 0 \
|
|
--num-encoder-layers 1,1,1,1,1,1 \
|
|
--feedforward-dim 64,96,96,96,96,96 \
|
|
--encoder-dim 32,64,64,64,64,64 \
|
|
--encoder-unmasked-dim 32,32,32,32,32,32 \
|
|
--base-lr 0.04 \
|
|
--full-libri 0 \
|
|
--enable-musan 0 \
|
|
--max-duration 30 \
|
|
--print-diagnostics 1
|
|
}
|
|
|
|
function test_pruned_transducer_stateless_2022_03_12() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless2_2022_04_29() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
pushd $repo
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt"
|
|
popd
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-epoch-38-avg-10.pt pretrained.pt
|
|
popd
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless3_2022_04_29() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
pushd $repo
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt"
|
|
popd
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-epoch-25-avg-6.pt pretrained.pt
|
|
popd
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless5_2022_05_13() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-epoch-39-avg-7.pt pretrained.pt
|
|
popd
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless5/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--num-encoder-layers 18 \
|
|
--dim-feedforward 2048 \
|
|
--nhead 8 \
|
|
--encoder-dim 512 \
|
|
--decoder-dim 512 \
|
|
--joiner-dim 512 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless5/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav \
|
|
--num-encoder-layers 18 \
|
|
--dim-feedforward 2048 \
|
|
--nhead 8 \
|
|
--encoder-dim 512 \
|
|
--decoder-dim 512 \
|
|
--joiner-dim 512
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless7_2022_11_11() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless7/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless7/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless7/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless7/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless8_2022_11_14() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless8/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless8/export.py \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--use-averaged-model false \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless8/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless8/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless8/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless7_ctc_2022_12_01() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless7_ctc/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
|
|
--model-filename $repo/exp/cpu_jit.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_zipformer_mmi_2022_12_08() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/3gram.pt"
|
|
git lfs pull --include "data/lang_bpe_500/4gram.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./zipformer_mmi/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./zipformer_mmi/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
--lang-dir $repo/data/lang_bpe_500 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
|
|
log "$method"
|
|
|
|
./zipformer_mmi/pretrained.py \
|
|
--method $method \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--lang-dir $repo/data/lang_bpe_500 \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless7_streaming_2022_12_29() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
git lfs pull --include "exp/encoder_jit_trace.pt"
|
|
git lfs pull --include "exp/decoder_jit_trace.pt"
|
|
git lfs pull --include "exp/joiner_jit_trace.pt"
|
|
cd exp
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless7_streaming/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--decode-chunk-len 32 \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless7_streaming/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
--decode-chunk-len 32 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "Export to torchscript model by torch.jit.trace()"
|
|
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--decode-chunk-len 32 \
|
|
--epoch 99 \
|
|
--avg 1
|
|
|
|
log "Decode with models exported by torch.jit.trace()"
|
|
|
|
./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
|
--decode-chunk-len 32 \
|
|
$repo/test_wavs/1089-134686-0001.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless7_streaming/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--decode-chunk-len 32 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless7_streaming/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--decode-chunk-len 32 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless7_ctc_bs_2023_01_29() {
|
|
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless7_ctc_bs/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
|
|
--model-filename $repo/exp/cpu_jit.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_conformer_ctc3_2022_11_27() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
|
git lfs pull --include "exp/jit_trace.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Decode with models exported by torch.jit.trace()"
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./conformer_ctc3/jit_pretrained.py \
|
|
--model-filename $repo/exp/jit_trace.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
log "Export to torchscript model"
|
|
|
|
./conformer_ctc3/export.py \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--jit-trace 1 \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--use-averaged-model 0
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.trace()"
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./conformer_ctc3/jit_pretrained.py \
|
|
--model-filename $repo/exp/jit_trace.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./conformer_ctc3/pretrained.py \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_lstm_transducer_stateless2_2022_09_03() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
abs_repo=$(realpath $repo)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
|
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
|
popd
|
|
|
|
log "Test exporting with torch.jit.trace()"
|
|
|
|
./lstm_transducer_stateless2/export.py \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--use-averaged-model 0 \
|
|
--jit-trace 1
|
|
|
|
log "Decode with models exported by torch.jit.trace()"
|
|
|
|
./lstm_transducer_stateless2/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./lstm_transducer_stateless2/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./lstm_transducer_stateless2/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_pruned_transducer_stateless3_2022_05_13() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
|
|
ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
|
|
popd
|
|
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless3/export.py \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
./pruned_transducer_stateless3/export.py \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit-trace 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.trace()"
|
|
|
|
./pruned_transducer_stateless3/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless3/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--encoder-model-filename $repo/exp/encoder_jit_script.pt \
|
|
--decoder-model-filename $repo/exp/decoder_jit_script.pt \
|
|
--joiner-model-filename $repo/exp/joiner_jit_script.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_streaming_pruned_transducer_stateless2_20220625() {
|
|
repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained-epoch-24-avg-10.pt pretrained.pt
|
|
popd
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--simulate-streaming 1 \
|
|
--causal-convolution 1 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--simulate-streaming 1 \
|
|
--causal-convolution 1 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_streaming_zipformer_2023_05_17() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
|
git lfs pull --include "exp/jit_script_chunk_16_left_128.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./zipformer/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--causal 1 \
|
|
--chunk-size 16 \
|
|
--left-context-frames 128 \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./zipformer/jit_pretrained_streaming.py \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav
|
|
|
|
for method in greedy_search modified_beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./zipformer/pretrained.py \
|
|
--causal 1 \
|
|
--chunk-size 16 \
|
|
--left-context-frames 128 \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_zipformer_2023_05_18() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
|
git lfs pull --include "exp/jit_script.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./zipformer/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./zipformer/jit_pretrained.py \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--nn-model-filename $repo/exp/jit_script.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for method in greedy_search modified_beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./zipformer/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_transducer_stateless2_torchaudio_2022_04_19() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./transducer_stateless2/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do
|
|
log "$method"
|
|
|
|
./transducer_stateless2/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_zipformer_transducer_ctc_2023_06_13() {
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
|
git lfs pull --include "exp/jit_script.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./zipformer/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-transducer 1 \
|
|
--use-ctc 1 \
|
|
--use-averaged-model false \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
for method in ctc-decoding 1best; do
|
|
./zipformer/jit_pretrained_ctc.py \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--model-filename $repo/exp/jit_script.pt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $method \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in ctc-decoding 1best; do
|
|
log "$method"
|
|
|
|
./zipformer/pretrained_ctc.py \
|
|
--use-transducer 1 \
|
|
--use-ctc 1 \
|
|
--method $method \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_transducer_stateless_multi_datasets_bpe_500_2022_03_01() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_transducer_stateless_bpe_500_2022_02_07() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./transducer_stateless/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do
|
|
log "$method"
|
|
|
|
./transducer_stateless/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_zipformer_ctc_en_2023_10_02() {
|
|
repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
log "CTC greedy search"
|
|
|
|
./zipformer/onnx_pretrained_ctc.py \
|
|
--nn-model $repo/model.onnx \
|
|
--tokens $repo/tokens.txt \
|
|
$repo/test_wavs/0.wav \
|
|
$repo/test_wavs/1.wav \
|
|
$repo/test_wavs/2.wav
|
|
|
|
log "CTC H decoding"
|
|
|
|
./zipformer/onnx_pretrained_ctc_H.py \
|
|
--nn-model $repo/model.onnx \
|
|
--tokens $repo/tokens.txt \
|
|
--H $repo/H.fst \
|
|
$repo/test_wavs/0.wav \
|
|
$repo/test_wavs/1.wav \
|
|
$repo/test_wavs/2.wav
|
|
|
|
log "CTC HL decoding"
|
|
|
|
./zipformer/onnx_pretrained_ctc_HL.py \
|
|
--nn-model $repo/model.onnx \
|
|
--words $repo/words.txt \
|
|
--HL $repo/HL.fst \
|
|
$repo/test_wavs/0.wav \
|
|
$repo/test_wavs/1.wav \
|
|
$repo/test_wavs/2.wav
|
|
|
|
log "CTC HLG decoding"
|
|
|
|
./zipformer/onnx_pretrained_ctc_HLG.py \
|
|
--nn-model $repo/model.onnx \
|
|
--words $repo/words.txt \
|
|
--HLG $repo/HLG.fst \
|
|
$repo/test_wavs/0.wav \
|
|
$repo/test_wavs/1.wav \
|
|
$repo/test_wavs/2.wav
|
|
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_conformer_ctc_jit_bpe_500_2021_11_09() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
pushd $repo
|
|
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L_disambig.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lang_bpe_500/lexicon.txt"
|
|
git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt"
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
|
git lfs pull --include "data/lang_bpe_500/words.txt"
|
|
git lfs pull --include "data/lm/G_3_gram.fst.txt"
|
|
|
|
popd
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
log "CTC decoding"
|
|
|
|
./conformer_ctc/pretrained.py \
|
|
--method ctc-decoding \
|
|
--num-classes 500 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "HLG decoding"
|
|
|
|
./conformer_ctc/pretrained.py \
|
|
--method 1best \
|
|
--num-classes 500 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "CTC decoding on CPU with kaldi decoders using OpenFst"
|
|
|
|
log "Exporting model with torchscript"
|
|
|
|
pushd $repo/exp
|
|
ln -s pretrained.pt epoch-99.pt
|
|
popd
|
|
|
|
./conformer_ctc/export.py \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--exp-dir $repo/exp \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp
|
|
|
|
|
|
log "Generating H.fst, HL.fst"
|
|
|
|
./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt
|
|
|
|
ls -lh $repo/data/lang_bpe_500
|
|
|
|
log "Decoding with H on CPU with OpenFst"
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
|
--nn-model $repo/exp/cpu_jit.pt \
|
|
--H $repo/data/lang_bpe_500/H.fst \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "Decoding with HL on CPU with OpenFst"
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
|
--nn-model $repo/exp/cpu_jit.pt \
|
|
--HL $repo/data/lang_bpe_500/HL.fst \
|
|
--words $repo/data/lang_bpe_500/words.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
log "Decoding with HLG on CPU with OpenFst"
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
|
--nn-model $repo/exp/cpu_jit.pt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.fst \
|
|
--words $repo/data/lang_bpe_500/words.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
rm -rf $repo
|
|
}
|
|
|
|
function test_transducer_bpe_500_2021_12_23() {
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
git lfs install
|
|
git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
log "Beam search decoding"
|
|
|
|
./transducer/pretrained.py \
|
|
--method beam_search \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
rm -rf $repo
|
|
}
|
|
|
|
prepare_data
|
|
run_diagnostics
|
|
test_pruned_transducer_stateless_2022_03_12
|
|
test_pruned_transducer_stateless2_2022_04_29
|
|
test_pruned_transducer_stateless3_2022_04_29
|
|
test_pruned_transducer_stateless5_2022_05_13
|
|
test_pruned_transducer_stateless7_2022_11_11
|
|
test_pruned_transducer_stateless8_2022_11_14
|
|
test_pruned_transducer_stateless7_ctc_2022_12_01
|
|
test_zipformer_mmi_2022_12_08
|
|
test_pruned_transducer_stateless7_streaming_2022_12_29
|
|
test_pruned_transducer_stateless7_ctc_bs_2023_01_29
|
|
test_conformer_ctc3_2022_11_27
|
|
test_lstm_transducer_stateless2_2022_09_03
|
|
test_pruned_transducer_stateless3_2022_05_13
|
|
test_streaming_pruned_transducer_stateless2_20220625
|
|
test_streaming_zipformer_2023_05_17
|
|
test_zipformer_2023_05_18
|
|
test_transducer_stateless2_torchaudio_2022_04_19
|
|
test_zipformer_transducer_ctc_2023_06_13
|
|
test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21
|
|
test_transducer_stateless_multi_datasets_bpe_500_2022_03_01
|
|
test_transducer_stateless_bpe_500_2022_02_07
|
|
test_zipformer_ctc_en_2023_10_02
|
|
# test_conformer_ctc_jit_bpe_500_2021_11_09 # failes for torch != 1.13.x and torch != 2.0.x
|
|
test_transducer_bpe_500_2021_12_23
|