mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
* init files * add ctc as auxiliary loss and ctc_decode.py * tuning the scalar of HLG score for 1best, nbest and nbest-oracle * rename to pruned_transducer_stateless7_ctc * fix doc * fix bug, recover the hlg scores * modify ctc_decode.py, move out the hlg scale * fix hlg_scale * add export.py and pretrained.py, and so on * upload files, update README.md and RESULTS.md * add CI test * update .gitignore * create symlinks * Add Blank Skip to Zipformer+CTC * Add warmup to blank skip * Add warmup to blank skip * Add __init__.py * Add parameters_names to Adam * Add warmup to blank skip * Modify frame_reducer * Modify frame_reducer * Add Blank Skip to decode. * Add ctc_decode.py * Add blank skip to Zipformer+CTC * process conflict * process conflict * modify ctc_guild_decode_bk.py * modify Lconv * produce the conflict * Add export.py * finish export * fix for running black * Add ci test * Add ci-test * chmod * chmod * fix bug for ci-test * fix bug for ci-test * fix bug for ci-test * rename the dirname * rename the dirname * change dirname * change dirname * fix notes * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * add pretrained.py * fix * fix * fix * finished * add the Copyright info and notes Co-authored-by: Zengwei Yao <yaozengwei@outlook.com> Co-authored-by: yifanyang <yifanyeung@yifanyangs-MacBook-Pro.local>
151 lines
4.6 KiB
Bash
Executable File
151 lines
4.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
set -e
|
|
|
|
log() {
|
|
# This function is from espnet
|
|
local fname=${BASH_SOURCE[1]##*/}
|
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
|
}
|
|
|
|
cd egs/librispeech/ASR
|
|
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
|
|
|
|
log "Downloading pre-trained model from $repo_url"
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
|
repo=$(basename $repo_url)
|
|
|
|
log "Display test files"
|
|
tree $repo/
|
|
soxi $repo/test_wavs/*.wav
|
|
ls -lh $repo/test_wavs/*.wav
|
|
|
|
pushd $repo/exp
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/L.pt"
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|
git lfs pull --include "data/lm/G_4_gram.pt"
|
|
git lfs pull --include "exp/cpu_jit.pt"
|
|
git lfs pull --include "exp/pretrained.pt"
|
|
ln -s pretrained.pt epoch-99.pt
|
|
ls -lh *.pt
|
|
popd
|
|
|
|
log "Export to torchscript model"
|
|
./pruned_transducer_stateless7_ctc/export.py \
|
|
--exp-dir $repo/exp \
|
|
--use-averaged-model false \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--epoch 99 \
|
|
--avg 1 \
|
|
--jit 1
|
|
|
|
ls -lh $repo/exp/*.pt
|
|
|
|
log "Decode with models exported by torch.jit.script()"
|
|
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained.py \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
|
|
--model-filename $repo/exp/cpu_jit.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for sym in 1 2 3; do
|
|
log "Greedy search with --max-sym-per-frame $sym"
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
|
--method greedy_search \
|
|
--max-sym-per-frame $sym \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do
|
|
log "$method"
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \
|
|
--method $method \
|
|
--beam-size 4 \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
|
|
--checkpoint $repo/exp/pretrained.pt \
|
|
--words-file $repo/data/lang_bpe_500/words.txt \
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
|
--G $repo/data/lm/G_4_gram.pt \
|
|
--method $m \
|
|
--sample-rate 16000 \
|
|
$repo/test_wavs/1089-134686-0001.wav \
|
|
$repo/test_wavs/1221-135766-0001.wav \
|
|
$repo/test_wavs/1221-135766-0002.wav
|
|
done
|
|
|
|
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
|
mkdir -p pruned_transducer_stateless7_ctc/exp
|
|
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc/exp/epoch-999.pt
|
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
|
|
|
ls -lh data
|
|
ls -lh pruned_transducer_stateless7_ctc/exp
|
|
|
|
log "Decoding test-clean and test-other"
|
|
|
|
# use a small value for decoding with CPU
|
|
max_duration=100
|
|
|
|
for method in greedy_search fast_beam_search modified_beam_search; do
|
|
log "Decoding with $method"
|
|
|
|
./pruned_transducer_stateless7_ctc/decode.py \
|
|
--decoding-method $method \
|
|
--epoch 999 \
|
|
--avg 1 \
|
|
--use-averaged-model 0 \
|
|
--max-duration $max_duration \
|
|
--exp-dir pruned_transducer_stateless7_ctc/exp
|
|
done
|
|
|
|
for m in ctc-decoding 1best; do
|
|
./pruned_transducer_stateless7_ctc/ctc_decode.py \
|
|
--epoch 999 \
|
|
--avg 1 \
|
|
--exp-dir ./pruned_transducer_stateless7_ctc/exp \
|
|
--max-duration $max_duration \
|
|
--use-averaged-model 0 \
|
|
--decoding-method $m \
|
|
--hlg-scale 0.6 \
|
|
--lm-dir data/lm
|
|
done
|
|
|
|
rm pruned_transducer_stateless7_ctc/exp/*.pt
|
|
fi |