Compare commits

..

No commits in common. "master" and "v1.0" have entirely different histories.
master ... v1.0

3118 changed files with 3461 additions and 630489 deletions

30
.flake8
View File

@ -1,35 +1,11 @@
[flake8]
show-source=true
statistics=true
max-line-length = 88
max-line-length = 80
per-file-ignores =
# line too long
icefall/diagnostics.py: E501,
egs/*/ASR/*/conformer.py: E501,
egs/*/ASR/pruned_transducer_stateless*/*.py: E501,
egs/*/ASR/*/optim.py: E501,
egs/*/ASR/*/scaling.py: E501,
egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
egs/librispeech/ASR/conformer_ctc*/*py: E501,
egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
egs/librispeech/ASR/zipformer/*.py: E501, E203
egs/librispeech/ASR/RESULTS.md: E999,
egs/ljspeech/TTS/vits/*.py: E501, E203
# invalid escape sequence (cause by tex formular), W605
icefall/utils.py: E501, W605
egs/librispeech/ASR/conformer_ctc/conformer.py: E501,
exclude =
.git,
**/data/**,
icefall/shared/make_kn_lm.py,
icefall/__init__.py
icefall/ctc/__init__.py
ignore =
# E203 white space before ":"
E203,
# W503 line break before binary operator
W503,
# E226 missing whitespace around arithmetic operator
E226,
**/data/**

View File

@ -1,3 +0,0 @@
# Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
107df3b115a58f1b68a6458c3f94a130004be34c
d31db010371a4128856480382876acdc0d1739ed

View File

@ -1 +0,0 @@
piper_phonemize.html

View File

@ -1,343 +0,0 @@
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/aishell/ASR
function download_test_dev_manifests() {
git lfs install
fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests
log "Downloading pre-commputed fbank from $fbank_url"
git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests
ln -s $PWD/aishell-test-dev-manifests/data .
}
function test_transducer_stateless3_2022_06_20() {
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20
log "Downloading pre-trained model from $repo_url"
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt
popd
log "test greedy_search with pretrained.py"
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./pruned_transducer_stateless3/pretrained.py \
--method greedy_search \
--max-sym-per-frame $sym \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
log "test beam search with pretrained.py"
for method in modified_beam_search beam_search fast_beam_search; do
log "$method"
./pruned_transducer_stateless3/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p pruned_transducer_stateless3/exp
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_char data/
ls -lh data
ls -lh pruned_transducer_stateless3/exp
log "Decoding test and dev"
# use a small value for decoding with CPU
max_duration=100
for method in greedy_search fast_beam_search modified_beam_search; do
log "Decoding with $method"
./pruned_transducer_stateless3/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir pruned_transducer_stateless3/exp
done
rm pruned_transducer_stateless3/exp/*.pt
fi
rm -rf $repo
}
function test_zipformer_large_2023_10_24() {
log "CI testing large model"
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/
log "Downloading pre-trained model from $repo_url"
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
for method in modified_beam_search greedy_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--context-size 1 \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_char/tokens.txt \
--num-encoder-layers 2,2,4,5,4,2 \
--feedforward-dim 512,768,1536,2048,1536,768 \
--encoder-dim 192,256,512,768,512,256 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
rm -rf $repo
}
function test_zipformer_2023_10_24() {
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/
log "Downloading pre-trained model from $repo_url"
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
for method in modified_beam_search greedy_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--context-size 1 \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_char/tokens.txt \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
rm -rf $repo
}
function test_zipformer_small_2023_10_24() {
log "CI testing small model"
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/
log "Downloading pre-trained model from $repo_url"
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
for method in modified_beam_search greedy_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--context-size 1 \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_char/tokens.txt \
--num-encoder-layers 2,2,2,2,2,2 \
--feedforward-dim 512,768,768,768,768,768 \
--encoder-dim 192,256,256,256,256,256 \
--encoder-unmasked-dim 192,192,192,192,192,192 \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
rm -rf $repo
}
function test_transducer_stateless_modified_2022_03_01() {
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./transducer_stateless_modified/pretrained.py \
--method greedy_search \
--max-sym-per-frame $sym \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
for method in modified_beam_search beam_search; do
log "$method"
./transducer_stateless_modified/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
rm -rf $repo
}
function test_transducer_stateless_modified_2_2022_03_01() {
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./transducer_stateless_modified-2/pretrained.py \
--method greedy_search \
--max-sym-per-frame $sym \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
for method in modified_beam_search beam_search; do
log "$method"
./transducer_stateless_modified-2/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/BAC009S0764W0121.wav \
$repo/test_wavs/BAC009S0764W0122.wav \
$repo/test_wavs/BAC009S0764W0123.wav
done
rm -rf $repo
}
function test_conformer_ctc() {
repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
git lfs pull --include "data/lang_char/H.fst"
git lfs pull --include "data/lang_char/HL.fst"
git lfs pull --include "data/lang_char/HLG.fst"
popd
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
log "CTC decoding"
log "Exporting model with torchscript"
pushd $repo/exp
ln -s pretrained.pt epoch-99.pt
popd
./conformer_ctc/export.py \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--jit 1
ls -lh $repo/exp
ls -lh $repo/data/lang_char
log "Decoding with H on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model $repo/exp/cpu_jit.pt \
--H $repo/data/lang_char/H.fst \
--tokens $repo/data/lang_char/tokens.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
log "Decoding with HL on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model $repo/exp/cpu_jit.pt \
--HL $repo/data/lang_char/HL.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
log "Decoding with HLG on CPU with OpenFst"
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model $repo/exp/cpu_jit.pt \
--HLG $repo/data/lang_char/HLG.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav
rm -rf $repo
}
download_test_dev_manifests
test_transducer_stateless3_2022_06_20
test_zipformer_large_2023_10_24
test_zipformer_2023_10_24
test_zipformer_small_2023_10_24
test_transducer_stateless_modified_2022_03_01
test_transducer_stateless_modified_2_2022_03_01
# test_conformer_ctc # fails for torch 1.13.x and torch 2.0.x

View File

@ -1,94 +0,0 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install onnxoptimizer onnxsim
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/audioset/AT
function test_pretrained() {
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
repo=$(basename $repo_url)
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
pushd $repo/exp
git lfs pull --include pretrained.pt
ln -s pretrained.pt epoch-99.pt
ls -lh
popd
log "test pretrained.pt"
python3 zipformer/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--label-dict $repo/data/class_labels_indices.csv \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav \
$repo/test_wavs/4.wav
log "test jit export"
ls -lh $repo/exp/
python3 zipformer/export.py \
--exp-dir $repo/exp \
--epoch 99 \
--avg 1 \
--use-averaged-model 0 \
--jit 1
ls -lh $repo/exp/
log "test jit models"
python3 zipformer/jit_pretrained.py \
--nn-model-filename $repo/exp/jit_script.pt \
--label-dict $repo/data/class_labels_indices.csv \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav \
$repo/test_wavs/4.wav
log "test onnx export"
ls -lh $repo/exp/
python3 zipformer/export-onnx.py \
--exp-dir $repo/exp \
--epoch 99 \
--avg 1 \
--use-averaged-model 0
ls -lh $repo/exp/
pushd $repo/exp/
mv model-epoch-99-avg-1.onnx model.onnx
mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
popd
ls -lh $repo/exp/
log "test onnx models"
for m in model.onnx model.int8.onnx; do
log "$m"
python3 zipformer/onnx_pretrained.py \
--model-filename $repo/exp/model.onnx \
--label-dict $repo/data/class_labels_indices.csv \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav \
$repo/test_wavs/4.wav
done
log "prepare data for uploading to huggingface"
dst=/icefall/model-onnx
mkdir -p $dst
cp -v $repo/exp/*.onnx $dst/
cp -v $repo/data/* $dst/
cp -av $repo/test_wavs $dst
ls -lh $dst
ls -lh $dst/test_wavs
}
test_pretrained

View File

@ -1,167 +0,0 @@
#!/usr/bin/env bash
set -ex
apt-get update
apt-get install -y sox
python3 -m pip install numba conformer==0.3.2 diffusers librosa
python3 -m pip install jieba
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/baker_zh/TTS
sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff
function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
tar xvf BZNSYP-samples.tar.bz2
mv BZNSYP-samples BZNSYP
rm BZNSYP-samples.tar.bz2
popd
./prepare.sh
tree .
}
function train() {
pushd ./matcha
sed -i.bak s/1500/3/g ./train.py
git diff .
popd
./matcha/train.py \
--exp-dir matcha/exp \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20
ls -lh matcha/exp
}
function infer() {
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
./matcha/infer.py \
--num-buckets 2 \
--epoch 1 \
--exp-dir ./matcha/exp \
--tokens data/tokens.txt \
--cmvn ./data/fbank/cmvn.json \
--vocoder ./generator_v2 \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav ./generated.wav
ls -lh *.wav
soxi ./generated.wav
rm -v ./generated.wav
rm -v generator_v2
}
function export_onnx() {
pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
popd
pushd data/fbank
rm -v *.json
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
popd
./matcha/export_onnx.py \
--exp-dir ./matcha/exp \
--epoch 2000 \
--tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json
ls -lh *.onnx
if false; then
# The CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
fi
ls -lh *.onnx
python3 ./matcha/generate_lexicon.py
for v in v1 v2 v3; do
python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \
--vocoder ./hifigan_$v.onnx \
--tokens ./data/tokens.txt \
--lexicon ./lexicon.txt \
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
done
ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-steps-6-*.wav
cp ./model-steps-*.onnx /icefall
d=matcha-icefall-zh-baker
mkdir $d
cp -v data/tokens.txt $d
cp -v lexicon.txt $d
cp model-steps-3.onnx $d
pushd $d
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
tar xvf dict.tar.bz2
rm dict.tar.bz2
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
cat >README.md <<EOF
# Introduction
This model is trained using the dataset from
https://en.data-baker.com/datasets/freeDatasets/
The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
which is about 12 hours.
**Note**: The dataset is for non-commercial use only.
You can find the training code at
https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
EOF
ls -lh
popd
tar cvjf $d.tar.bz2 $d
mv $d.tar.bz2 /icefall
mv $d /icefall
}
prepare_data
train
infer
export_onnx
rm -rfv generator_v* matcha/exp
git checkout .

View File

@ -1,19 +0,0 @@
#!/usr/bin/env bash
# This script computes fbank features for the test-clean and test-other datasets.
# The computed features are saved to ~/tmp/fbank-libri and are
# cached for later runs
set -e
export PYTHONPATH=$PWD:$PYTHONPATH
echo $PYTHONPATH
mkdir ~/tmp/fbank-libri
cd egs/librispeech/ASR
mkdir -p data
cd data
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
cd ..
./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
ls -lh data/fbank/

View File

@ -1,75 +0,0 @@
ARG PYTHON_VERSION=3.8
FROM python:${PYTHON_VERSION}
ARG TORCHAUDIO_VERSION="0.13.0"
ARG TORCH_VERSION="1.13.0"
ARG K2_VERSION="1.24.4.dev20231220"
ARG KALDIFEAT_VERSION="1.25.3.dev20231221"
ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}"
ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
RUN apt-get update -y && \
apt-get install -qq -y \
cmake \
ffmpeg \
git \
git-lfs \
graphviz \
less \
tree \
vim \
&& \
apt-get clean && \
rm -rf /var/cache/apt/archives /var/lib/apt/lists
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${_K2_VERSION}
LABEL kaldifeat_version=${_KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
# Install dependencies
RUN pip install --no-cache-dir \
torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
\
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
conformer==0.3.2 \
cython \
diffusers \
dill \
espnet_tts_frontend \
graphviz \
kaldi-decoder \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
librosa \
"matplotlib<=3.9.4" \
multi_quantization \
numba \
"numpy<2.0" \
onnxoptimizer \
onnxsim \
onnx==1.17.0 \
onnxmltools \
onnxruntime==1.17.1 \
piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
pypinyin==0.50.0 \
pytest \
sentencepiece>=0.1.96 \
six \
tensorboard \
typeguard
# RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
# cd /workspace/icefall && \
# pip install --no-cache-dir -r requirements.txt
#
# ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
#
# WORKDIR /workspace/icefall

View File

@ -1,140 +0,0 @@
#!/usr/bin/env python3
# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang)
import argparse
import json
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--min-torch-version",
help="torch version",
)
parser.add_argument(
"--torch-version",
help="torch version",
)
parser.add_argument(
"--python-version",
help="python version",
)
return parser.parse_args()
def version_gt(a, b):
a_major, a_minor = list(map(int, a.split(".")))[:2]
b_major, b_minor = list(map(int, b.split(".")))[:2]
if a_major > b_major:
return True
if a_major == b_major and a_minor > b_minor:
return True
return False
def version_ge(a, b):
a_major, a_minor = list(map(int, a.split(".")))[:2]
b_major, b_minor = list(map(int, b.split(".")))[:2]
if a_major > b_major:
return True
if a_major == b_major and a_minor >= b_minor:
return True
return False
def get_torchaudio_version(torch_version):
if torch_version == "1.13.0":
return "0.13.0"
elif torch_version == "1.13.1":
return "0.13.1"
elif torch_version == "2.0.0":
return "2.0.1"
elif torch_version == "2.0.1":
return "2.0.2"
else:
return torch_version
def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
k2_version = "1.24.4.dev20250630"
kaldifeat_version = "1.25.5.dev20250630"
version = "20250630"
# torchaudio 2.5.0 does not support python 3.13
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
torch_version = []
torch_version += ["1.13.0", "1.13.1"]
torch_version += ["2.0.0", "2.0.1"]
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
# Test only torch >= 2.3.0
torch_version += ["2.3.0", "2.3.1"]
torch_version += ["2.4.0"]
torch_version += ["2.4.1"]
torch_version += ["2.5.0"]
torch_version += ["2.5.1"]
torch_version += ["2.6.0", "2.7.0", "2.7.1"]
if specified_torch_version:
torch_version = [specified_torch_version]
if specified_python_version:
python_version = [specified_python_version]
matrix = []
for p in python_version:
for t in torch_version:
if min_torch_version and version_gt(min_torch_version, t):
continue
# torchaudio <= 1.13.x supports only python <= 3.10
if version_gt(p, "3.10") and not version_gt(t, "2.0"):
continue
# only torch>=2.2.0 supports python 3.12
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
continue
if version_gt(p, "3.12") and not version_gt(t, "2.4"):
continue
if version_gt(t, "2.4") and version_gt("3.10", p):
# torch>=2.5 requires python 3.10
continue
k2_version_2 = k2_version
kaldifeat_version_2 = kaldifeat_version
matrix.append(
{
"k2-version": k2_version_2,
"kaldifeat-version": kaldifeat_version_2,
"version": version,
"python-version": p,
"torch-version": t,
"torchaudio-version": get_torchaudio_version(t),
}
)
return matrix
def main():
args = get_args()
matrix = get_matrix(
min_torch_version=args.min_torch_version,
specified_torch_version=args.torch_version,
specified_python_version=args.python_version,
)
print(json.dumps({"include": matrix}))
if __name__ == "__main__":
main()

View File

@ -1,17 +0,0 @@
#!/usr/bin/env bash
# This script downloads the pre-computed fbank features for
# dev and test datasets of GigaSpeech.
#
# You will find directories `~/tmp/giga-dev-dataset-fbank` after running
# this script.
set -e
mkdir -p ~/tmp
cd ~/tmp
git lfs install
git clone https://huggingface.co/csukuangfj/giga-dev-dataset-fbank
ls -lh giga-dev-dataset-fbank/data/fbank

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
# This script downloads the test-clean and test-other datasets
# of LibriSpeech and unzip them to the folder ~/tmp/download,
# which is cached by GitHub actions for later runs.
#
# You will find directories ~/tmp/download/LibriSpeech after running
# this script.
set -e
mkdir ~/tmp/download
cd egs/librispeech/ASR
ln -s ~/tmp/download .
cd download
wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
tar xf test-clean.tar.gz
rm test-clean.tar.gz
wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
tar xf test-other.tar.gz
rm test-other.tar.gz
pwd
ls -lh
ls -lh LibriSpeech

View File

@ -1,90 +0,0 @@
#!/usr/bin/env python3
def get_v1_2_0_files():
prefix = (
"https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
)
files = [
"piper_phonemize-1.2.0-cp310-cp310-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.2.0-cp311-cp311-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.2.0-cp312-cp312-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.2.0-cp37-cp37m-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.2.0-cp38-cp38-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
]
ans = [prefix + f for f in files]
ans.sort()
return ans
def get_v1_3_0_files():
prefix = (
"https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
)
files = [
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
]
ans = [prefix + f for f in files]
ans.sort()
return ans
def main():
files = get_v1_3_0_files() + get_v1_2_0_files()
with open("piper_phonemize.html", "w") as f:
for url in files:
file = url.split("/")[-1]
f.write(f'<a href="{url}">{file}</a><br/>\n')
if __name__ == "__main__":
main()

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
# This script installs kaldifeat into the directory ~/tmp/kaldifeat
# which is cached by GitHub actions for later runs.
set -e
mkdir -p ~/tmp
cd ~/tmp
git clone https://github.com/csukuangfj/kaldifeat
cd kaldifeat
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Release ..
make -j2 _kaldifeat

View File

@ -1,132 +0,0 @@
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/ksponspeech/ASR
function test_pretrained_non_streaming() {
git lfs install
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-zipformer-2024-06-24
repo=icefall-asr-ksponspeech-zipformer-2024-06-24
pushd $repo
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/trans.txt
cd ../exp
ln -s pretrained.pt epoch-99.pt
ls -lh
popd
log 'test pretrained.py'
./zipformer/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--method greedy_search \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav
log 'test export-onnx.py'
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp/
ls -lh $repo/exp
ls -lh $repo/data/lang_bpe_5000/
log 'test exported onnx models'
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
$repo/test_wavs/0.wav
dst=/tmp/model-2024-06-24
mkdir -p $dst
cp -av $repo/test_wavs $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
rm -rf $repo
}
function test_pretrained_streaming() {
git lfs install
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
pushd $repo
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
cd ../exp
ln -s pretrained.pt epoch-99.pt
ls -lh
popd
log 'test pretrained.py'
./pruned_transducer_stateless7_streaming/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--method greedy_search \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav
log 'test export-onnx.py'
./pruned_transducer_stateless7_streaming/export-onnx.py \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--decode-chunk-len 32 \
--exp-dir $repo/exp/
ls -lh $repo/exp
ls -lh $repo/data/lang_bpe_5000/
log 'test exported onnx models'
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
$repo/test_wavs/0.wav
dst=/tmp/model-2024-06-16
mkdir -p $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
rm -rf $repo
}
test_pretrained_non_streaming
test_pretrained_streaming

File diff suppressed because it is too large Load Diff

View File

@ -1,275 +0,0 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install kaldi-native-fbank soundfile librosa
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/librispeech/ASR
# https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
# sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
function export_2023_02_20() {
d=exp_2023_02_20
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/data/lang_char_bpe/tokens.txt
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/3.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/4.wav
ls -lh
popd
./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d/ \
--decode-chunk-len 64 \
--num-encoder-layers "2,4,3,2,4" \
--feedforward-dims "1024,1024,1536,1536,1024" \
--nhead "8,8,8,8,8" \
--encoder-dims "384,384,384,384,384" \
--attention-dims "192,192,192,192,192" \
--encoder-unmasked-dims "256,256,256,256,256" \
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512
ls -lh $d/
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/0.wav
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/1.wav
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
mkdir -p $dst
./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform 2>/dev/null
ls -lh $dst/
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
--decoder $d/decoder-epoch-99-avg-1.onnx \
--joiner $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
--wav $d/0.wav
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
# https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
# sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
function export_2023_02_16() {
d=exp_2023_02_16
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/data/lang_char_bpe/tokens.txt
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/3.wav
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/4.wav
ls -lh
popd
./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d/ \
--decode-chunk-len 64 \
\
--num-encoder-layers 2,2,2,2,2 \
--feedforward-dims 768,768,768,768,768 \
--nhead 4,4,4,4,4 \
--encoder-dims 256,256,256,256,256 \
--attention-dims 192,192,192,192,192 \
--encoder-unmasked-dims 192,192,192,192,192 \
\
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512
ls -lh $d/
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/0.wav
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
$d/1.wav
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
mkdir -p $dst
./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform 2>/dev/null
ls -lh $dst/
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
--decoder $d/decoder-epoch-99-avg-1.onnx \
--joiner $d/joiner-epoch-99-avg-1.onnx \
--tokens $d/tokens.txt \
--wav $d/0.wav
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
function export_2023_06_26() {
d=exp_2023_06_26
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
ls -lh $d/
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
ls -lh $dst/
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
export_2023_02_16
export_2023_02_20
else
export_2023_06_26
fi

View File

@ -1,157 +0,0 @@
#!/usr/bin/env bash
set -ex
apt-get update
apt-get install -y sox
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
python3 -m pip install espnet_tts_frontend
python3 -m pip install numba conformer==0.3.2 diffusers librosa
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/ljspeech/TTS
sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff
function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2
popd
./prepare.sh
tree .
}
function train() {
pushd ./matcha
sed -i.bak s/1500/3/g ./train.py
git diff .
popd
./matcha/train.py \
--exp-dir matcha/exp \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20
ls -lh matcha/exp
}
function infer() {
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
./matcha/infer.py \
--num-buckets 2 \
--epoch 1 \
--exp-dir ./matcha/exp \
--tokens data/tokens.txt \
--vocoder ./generator_v1 \
--input-text "how are you doing?" \
--output-wav ./generated.wav
ls -lh *.wav
soxi ./generated.wav
rm -v ./generated.wav
rm -v generator_v1
}
function export_onnx() {
pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
popd
pushd data/fbank
rm -fv *.json
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
popd
./matcha/export_onnx.py \
--exp-dir ./matcha/exp \
--epoch 4000 \
--tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json
ls -lh *.onnx
if false; then
# The CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
fi
ls -lh *.onnx
for v in v1 v2 v3; do
python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \
--vocoder ./hifigan_$v.onnx \
--tokens ./data/tokens.txt \
--input-text "how are you doing?" \
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
done
ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-steps-6-*.wav
cp ./model-steps-*.onnx /icefall
d=matcha-icefall-en_US-ljspeech
mkdir $d
cp -v data/tokens.txt $d
cp model-steps-3.onnx $d
pushd $d
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
rm espeak-ng-data.tar.bz2
cat >README.md <<EOF
# Introduction
This model is trained using the dataset from
https://keithito.com/LJ-Speech-Dataset/
The dataset contains only 1 female speaker.
You can find the training code at
https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
EOF
ls -lh
popd
tar cvjf $d.tar.bz2 $d
mv $d.tar.bz2 /icefall
mv $d /icefall
}
prepare_data
train
infer
export_onnx
rm -rfv generator_v* matcha/exp
git checkout .

View File

@ -1,157 +0,0 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
python3 -m pip install espnet_tts_frontend
python3 -m pip install numba
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/ljspeech/TTS
sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff
function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2
popd
./prepare.sh
tree .
}
function train() {
pushd ./vits
sed -i.bak s/200/3/g ./train.py
git diff .
popd
for t in low medium high; do
./vits/train.py \
--exp-dir vits/exp-$t \
--model-type $t \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20
ls -lh vits/exp-$t
done
}
function infer() {
for t in low medium high; do
./vits/infer.py \
--num-buckets 2 \
--model-type $t \
--epoch 1 \
--exp-dir ./vits/exp-$t \
--tokens data/tokens.txt \
--max-duration 20
done
}
function export_onnx() {
for t in low medium high; do
./vits/export-onnx.py \
--model-type $t \
--epoch 1 \
--exp-dir ./vits/exp-$t \
--tokens data/tokens.txt
ls -lh vits/exp-$t/
done
}
function test_medium() {
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
./vits/export-onnx.py \
--model-type medium \
--epoch 820 \
--exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
./vits/test_onnx.py \
--model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
--output-filename /icefall/test-medium.wav
ls -lh /icefall/test-medium.wav
d=/icefall/vits-icefall-en_US-ljspeech-medium
mkdir $d
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
pushd $d
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
rm espeak-ng-data.tar.bz2
cd ..
tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
rm -rf vits-icefall-en_US-ljspeech-medium
ls -lh *.tar.bz2
popd
}
function test_low() {
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
./vits/export-onnx.py \
--model-type low \
--epoch 1600 \
--exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
./vits/test_onnx.py \
--model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
--output-filename /icefall/test-low.wav
ls -lh /icefall/test-low.wav
d=/icefall/vits-icefall-en_US-ljspeech-low
mkdir $d
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
pushd $d
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
tar xf espeak-ng-data.tar.bz2
rm espeak-ng-data.tar.bz2
cd ..
tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
rm -rf vits-icefall-en_US-ljspeech-low
ls -lh *.tar.bz2
popd
}
prepare_data
train
infer
export_onnx
rm -rf vits/exp-{low,medium,high}
test_medium
test_low

View File

@ -1,756 +0,0 @@
#!/usr/bin/env bash
set -ex
git config --global user.name "k2-fsa"
git config --global user.email "csukuangfj@gmail.com"
git config --global lfs.allowincompletepush true
python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi_zh-hans/ASR
log "pwd: $PWD"
function run_2023_9_2() {
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp
git lfs pull --include pretrained.pt
ln -s pretrained.pt epoch-99.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "--------------------------------------------"
log "Export non-streaming ONNX transducer models "
log "--------------------------------------------"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--causal False \
--fp16 1
ls -lh $repo/exp
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
rm -rf $repo
}
function run_2023_11_05_streaming() {
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp/
git lfs pull --include pretrained.pt
rm -fv epoch-20.pt
rm -fv *.onnx
ln -s pretrained.pt epoch-20.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
--fp16 1
ls -lh $repo/exp/
log "------------------------------------------------------------"
log "Test exported streaming ONNX CTC models (greedy search) "
log "------------------------------------------------------------"
test_wavs=(
DEV_T0000000000.wav
DEV_T0000000001.wav
DEV_T0000000002.wav
TEST_MEETING_T0000000113.wav
TEST_MEETING_T0000000219.wav
TEST_MEETING_T0000000351.wav
)
for w in ${test_wavs[@]}; do
log "----fp32----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
log "----int8----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
log "----fp16----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
done
log "Upload onnx CTC models to huggingface"
name=(
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
)
for n in ${name[@]}; do
url=https://huggingface.co/k2-fsa/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "bpe.model" "*.wav"
ls -lh
file bpe.model
git status
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
--fp16 1
ls -lh $repo/exp
log "------------------------------------------------------------"
log "Test exported streaming ONNX transducer models (Python code)"
log "------------------------------------------------------------"
log "test fp32"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "test int8"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "test fp16"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
name=(
sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "bpe.model" "*.wav"
ls -lh
file bpe.model
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
}
function run_2023_12_12_streaming() {
log "Upload onnx transducer models to huggingface"
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/encoder*.onnx $dst
cp -v $repo/exp/decoder*.onnx $dst
cp -v $repo/exp/joiner*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" bpe.model "*.wav"
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
}
function run_yuekai_large() {
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/ \
--tokens $repo/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
\
--num-encoder-layers 2,2,4,5,4,2 \
--feedforward-dim 768,1024,1536,2048,1536,768 \
--encoder-dim 256,384,512,768,512,256 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
\
--fp16 1 \
--use-whisper-features 1
ls -lh $repo/
pushd $repo
cat >README.md <<EOF
# Introduction
This model is converted
from
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
The training code can be found at
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
EOF
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
ls -lh *.onnx
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
popd
for w in 0.wav 1.wav 8k.wav; do
log "---fp32---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---int8---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.int8.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---fp16---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.fp16.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
done
name=(
sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
cp -v $repo/model.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
cp -v $repo/model.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
cp -v $repo/model.fp16.onnx $dst
fi
cp -v $repo/tokens.txt $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm $repo/*.onnx
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo \
--tokens $repo/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
\
--num-encoder-layers 2,2,4,5,4,2 \
--feedforward-dim 768,1024,1536,2048,1536,768 \
--encoder-dim 256,384,512,768,512,256 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
\
--fp16 1 \
--use-whisper-features 1
ls -lh $repo
pushd $repo
for m in encoder decoder joiner; do
mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
done
ls -lh *.onnx
popd
for w in 0.wav 1.wav 8k.wav; do
log "---fp32---"
sherpa-onnx \
--encoder=$repo/encoder.onnx \
--decoder=$repo/decoder.onnx \
--joiner=$repo/joiner.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---int8---"
sherpa-onnx \
--encoder=$repo/encoder.int8.onnx \
--decoder=$repo/decoder.onnx \
--joiner=$repo/joiner.int8.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---fp16---"
sherpa-onnx \
--encoder=$repo/encoder.fp16.onnx \
--decoder=$repo/decoder.fp16.onnx \
--joiner=$repo/joiner.fp16.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
done
name=(
sherpa-onnx-streaming-zipformer-zh-2025-06-30
sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
cp -v $repo/encoder.onnx $dst
cp -v $repo/decoder.onnx $dst
cp -v $repo/joiner.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
cp -v $repo/encoder.int8.onnx $dst
cp -v $repo/decoder.onnx $dst
cp -v $repo/joiner.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
cp -v $repo/encoder.fp16.onnx $dst
cp -v $repo/decoder.fp16.onnx $dst
cp -v $repo/joiner.fp16.onnx $dst
fi
cp -v $repo/tokens.txt $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
}
function run_yuekai_xl() {
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include pretrained.pt
git lfs pull --include data/lang_bpe_2000/bpe.model
mv pretrained.pt epoch-99.pt
ls -lh *.pt
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/ \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
\
--num-encoder-layers 2,3,5,6,5,3 \
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
--encoder-dim 512,768,1024,1536,1024,512 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
--decoder-dim 768 --joiner-dim 768 \
--value-head-dim 18 \
--query-head-dim 48 \
--num-heads 4,4,4,8,4,4 \
\
--fp16 1 \
--use-whisper-features 1 \
--use-external-data 1
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
ls -lh *.onnx
mkdir test_wavs
pushd test_wavs
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
popd
for w in 0.wav 1.wav 8k.wav; do
log "---int8---"
sherpa-onnx \
--zipformer2-ctc-model=./model.int8.onnx \
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
test_wavs/$w
log "---fp16---"
sherpa-onnx \
--zipformer2-ctc-model=./model.fp16.onnx \
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
test_wavs/$w
done
pushd $repo
cat >README.md <<EOF
# Introduction
This model is converted
from
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
The training code can be found at
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
EOF
popd
name=(
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
cp -v model.fp16.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
cp -v model.int8.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v ./test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav" "bpe.model"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm -fv *.onnx *.weights
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo/ \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
\
--num-encoder-layers 2,3,5,6,5,3 \
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
--encoder-dim 512,768,1024,1536,1024,512 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
--decoder-dim 768 --joiner-dim 768 \
--value-head-dim 18 \
--query-head-dim 48 \
--num-heads 4,4,4,8,4,4 \
\
--fp16 1 \
--use-whisper-features 1 \
--use-external-data 1
ls -lh *.onnx
ls -lh *.weights
mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
name=(
sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
cp -v encoder.fp16.onnx $dst
cp -v decoder.fp16.onnx $dst
cp -v joiner.fp16.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
cp -v encoder.int8.onnx $dst
cp -v decoder.onnx $dst
cp -v joiner.int8.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v ./test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav" "bpe.model"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm -fv *.onnx *.weights
}
# run_yuekai_large
# run_yuekai_xl
# run_2023_9_2
run_2023_11_05_streaming
# run_2023_12_12_streaming

View File

@ -1,73 +0,0 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install kaldi-native-fbank soundfile librosa
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi_zh-hans/ASR
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
function export_2023_11_05() {
d=exp
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
export_2023_11_05

View File

@ -1,13 +0,0 @@
#!/usr/bin/env bash
# This script assumes that test-clean and test-other are downloaded
# to egs/librispeech/ASR/download/LibriSpeech and generates manifest
# files in egs/librispeech/ASR/data/manifests
set -e
cd egs/librispeech/ASR
[ ! -e download ] && ln -s ~/tmp/download .
mkdir -p data/manifests
lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
ls -lh data/manifests

View File

@ -1,62 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/gigaspeech/ASR
repo_url=https://huggingface.co/wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p pruned_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data
ls -lh data/lang_bpe_500
ls -lh data/fbank
ls -lh pruned_transducer_stateless2/exp
pushd data/fbank
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
popd
log "Decoding dev and test"
# use a small value for decoding with CPU
max_duration=100
# Test only greedy_search to reduce CI running time
# for method in greedy_search fast_beam_search modified_beam_search; do
for method in greedy_search; do
log "Decoding with $method"
./pruned_transducer_stateless2/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--max-duration $max_duration \
--exp-dir pruned_transducer_stateless2/exp
done
rm pruned_transducer_stateless2/exp/*.pt
fi

View File

@ -1,172 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/gigaspeech/ASR
repo_url=https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
log "Downloading pre-trained model from $repo_url"
git lfs install
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "data/lang_bpe_500/tokens.txt"
git lfs pull --include "exp/jit_script.pt"
git lfs pull --include "exp/pretrained.pt"
rm epoch-30.pt
ln -s pretrained.pt epoch-30.pt
rm *.onnx
ls -lh
popd
log "----------------------------------------"
log "Export ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 30 \
--avg 1 \
--exp-dir $repo/exp
ls -lh $repo/exp
log "------------------------------------------------------------"
log "Test exported ONNX transducer models (Python code) "
log "------------------------------------------------------------"
log "test fp32"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
log "test int8"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
log "Upload models to huggingface"
git config --global user.name "k2-fsa"
git config --global user.email "xxx@gmail.com"
url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
cp -v $repo/data/lang_bpe_500/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx"
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh
mv -v $dst.tar.bz2 ../../../
log "Export to torchscript model"
./zipformer/export.py \
--exp-dir $repo/exp \
--use-averaged-model false \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 30 \
--avg 1 \
--jit 1
ls -lh $repo/exp/*.pt
log "Decode with models exported by torch.jit.script()"
./zipformer/jit_pretrained.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--nn-model-filename $repo/exp/jit_script.pt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
for method in greedy_search modified_beam_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p zipformer/exp
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
mkdir -p data
ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data
ls -lh zipformer/exp
mkdir -p data/fbank
pushd data/fbank
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
popd
log "Decoding test-clean and test-other"
# use a small value for decoding with CPU
max_duration=100
for method in greedy_search; do
log "Decoding with $method"
./zipformer/decode.py \
--decoding-method $method \
--epoch 30 \
--avg 1 \
--use-averaged-model 0 \
--max-duration $max_duration \
--exp-dir zipformer/exp
done
rm zipformer/exp/*.pt
fi

View File

@ -1,191 +0,0 @@
#!/usr/bin/env bash
#
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/librispeech/ASR
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
abs_repo=$(realpath $repo)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
popd
log "Test exporting with torch.jit.trace()"
./lstm_transducer_stateless2/export.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--avg 1 \
--use-averaged-model 0 \
--jit-trace 1
log "Decode with models exported by torch.jit.trace()"
./lstm_transducer_stateless2/jit_pretrained.py \
--bpe-model $repo/data/lang_bpe_500/bpe.model \
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./lstm_transducer_stateless2/pretrained.py \
--method greedy_search \
--max-sym-per-frame $sym \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done
for method in modified_beam_search beam_search fast_beam_search; do
log "$method"
./lstm_transducer_stateless2/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/pretrained.pt"
mv exp/pretrained.pt exp/epoch-88.pt
popd
mkdir -p lstm_transducer_stateless2/exp
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data
ls -lh lstm_transducer_stateless2/exp
log "Decoding test-clean and test-other with RNN LM"
./lstm_transducer_stateless2/decode.py \
--use-averaged-model 0 \
--epoch 999 \
--avg 1 \
--exp-dir lstm_transducer_stateless2/exp \
--max-duration 600 \
--decoding-method modified_beam_search_lm_shallow_fusion \
--beam 4 \
--use-shallow-fusion 1 \
--lm-type rnn \
--lm-exp-dir $lm_repo/exp \
--lm-epoch 88 \
--lm-avg 1 \
--lm-scale 0.3 \
--rnn-lm-num-layers 3 \
--rnn-lm-tie-weights 1
fi
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
log "Download bi-gram LM from ${bigram_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
bigramlm_repo=$(basename $bigram_repo_url)
pushd $bigramlm_repo
git lfs pull --include "2gram.fst.txt"
cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
popd
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
lm_repo=$(basename $lm_repo_url)
pushd $lm_repo
git lfs pull --include "exp/pretrained.pt"
mv exp/pretrained.pt exp/epoch-88.pt
popd
mkdir -p lstm_transducer_stateless2/exp
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data
ls -lh lstm_transducer_stateless2/exp
log "Decoding test-clean and test-other"
./lstm_transducer_stateless2/decode.py \
--use-averaged-model 0 \
--epoch 999 \
--avg 1 \
--exp-dir lstm_transducer_stateless2/exp \
--max-duration 600 \
--decoding-method modified_beam_search_LODR \
--beam 4 \
--use-shallow-fusion 1 \
--lm-type rnn \
--lm-exp-dir $lm_repo/exp \
--lm-scale 0.4 \
--lm-epoch 88 \
--rnn-lm-avg 1 \
--rnn-lm-num-layers 3 \
--rnn-lm-tie-weights 1 \
--tokens-ngram 2 \
--ngram-lm-scale -0.16
fi
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
mkdir -p lstm_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data
ls -lh lstm_transducer_stateless2/exp
log "Decoding test-clean and test-other"
# use a small value for decoding with CPU
max_duration=100
for method in greedy_search fast_beam_search; do
log "Decoding with $method"
./lstm_transducer_stateless2/decode.py \
--decoding-method $method \
--epoch 999 \
--avg 1 \
--use-averaged-model 0 \
--max-duration $max_duration \
--exp-dir lstm_transducer_stateless2/exp
done
rm lstm_transducer_stateless2/exp/*.pt
fi

View File

@ -1,135 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi_zh-hans/ASR
log "==== Test icefall-asr-multi-zh-hans-zipformer-2023-9-2 ===="
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s epoch-20.pt epoch-99.pt
popd
ls -lh $repo/exp/*.pt
./zipformer/pretrained.py \
--checkpoint $repo/exp/epoch-99.pt \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--method greedy_search \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
for method in modified_beam_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/epoch-99.pt \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done
rm -rf $repo
log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ===="
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s epoch-20.pt epoch-99.pt
popd
ls -lh $repo/exp/*.pt
./zipformer/pretrained.py \
--checkpoint $repo/exp/epoch-99.pt \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--use-ctc 1 \
--method greedy_search \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
for method in modified_beam_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--beam-size 4 \
--use-ctc 1 \
--checkpoint $repo/exp/epoch-99.pt \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done
rm -rf $repo
cd ../../../egs/multi_zh_en/ASR
log "==== Test icefall-asr-zipformer-multi-zh-en-2023-11-22 ===="
repo_url=https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
./zipformer/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
--method greedy_search \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
for method in modified_beam_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--method $method \
--beam-size 4 \
--checkpoint $repo/exp/pretrained.pt \
--bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
done
rm -rf $repo

View File

@ -1,44 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/swbd/ASR
repo_url=https://huggingface.co/zrjin/icefall-asr-swbd-conformer-ctc-2023-8-26
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s epoch-98.pt epoch-99.pt
popd
ls -lh $repo/exp/*.pt
for method in ctc-decoding 1best; do
log "$method"
./conformer_ctc/pretrained.py \
--method $method \
--checkpoint $repo/exp/epoch-99.pt \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--words-file $repo/data/lang_bpe_500/words.txt \
--HLG $repo/data/lang_bpe_500/HLG.pt \
--G $repo/data/lm/G_4_gram.pt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
done

View File

@ -1,119 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/wenetspeech/ASR
repo_url=https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s pretrained_epoch_10_avg_2.pt pretrained.pt
ln -s pretrained_epoch_10_avg_2.pt epoch-99.pt
popd
log "Test exporting to ONNX format"
./pruned_transducer_stateless2/export-onnx.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--epoch 99 \
--avg 1
log "Export to torchscript model"
./pruned_transducer_stateless2/export.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--epoch 99 \
--avg 1 \
--jit 1
./pruned_transducer_stateless2/export.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--epoch 99 \
--avg 1 \
--jit-trace 1
ls -lh $repo/exp/*.onnx
ls -lh $repo/exp/*.pt
log "Decode with ONNX models"
./pruned_transducer_stateless2/onnx_check.py \
--jit-filename $repo/exp/cpu_jit.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-10-avg-2.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-10-avg-2.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-10-avg-2.onnx \
--onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj-epoch-10-avg-2.onnx \
--onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj-epoch-10-avg-2.onnx
./pruned_transducer_stateless2/onnx_pretrained.py \
--tokens $repo/data/lang_char/tokens.txt \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
log "Decode with models exported by torch.jit.trace()"
./pruned_transducer_stateless2/jit_pretrained.py \
--tokens $repo/data/lang_char/tokens.txt \
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
./pruned_transducer_stateless2/jit_pretrained.py \
--tokens $repo/data/lang_char/tokens.txt \
--encoder-model-filename $repo/exp/encoder_jit_script.pt \
--decoder-model-filename $repo/exp/decoder_jit_script.pt \
--joiner-model-filename $repo/exp/joiner_jit_script.pt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./pruned_transducer_stateless2/pretrained.py \
--checkpoint $repo/exp/epoch-99.pt \
--lang-dir $repo/data/lang_char \
--decoding-method greedy_search \
--max-sym-per-frame $sym \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done
for method in modified_beam_search beam_search fast_beam_search; do
log "$method"
./pruned_transducer_stateless2/pretrained.py \
--decoding-method $method \
--beam-size 4 \
--checkpoint $repo/exp/epoch-99.pt \
--lang-dir $repo/data/lang_char \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done

View File

@ -1,230 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
pushd egs/librispeech/ASR
log "Install ncnn and pnnx"
# We are using a modified ncnn here. Will try to merge it to the official repo
# of ncnn
git clone https://github.com/csukuangfj/ncnn
pushd ncnn
git submodule init
git submodule update python/pybind11
python3 setup.py bdist_wheel
ls -lh dist/
pip install dist/*.whl
cd tools/pnnx
mkdir build
cd build
echo "which python3"
which python3
#/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
cmake -D Python3_EXECUTABLE=$(which python3) ..
make -j4 pnnx
./src/pnnx || echo "pass"
popd
export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
log "=========================================================================="
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
cd exp
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
popd
log "Export via torch.jit.trace()"
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
--exp-dir $repo/exp \
--epoch 99 \
--avg 1 \
--use-averaged-model 0 \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--num-encoder-layers 12 \
--chunk-length 32 \
--cnn-module-kernel 31 \
--left-context-length 32 \
--right-context-length 8 \
--memory-size 32
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
cd exp
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
popd
log "Export via torch.jit.trace()"
./lstm_transducer_stateless2/export-for-ncnn.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--avg 1 \
--use-averaged-model 0
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
$repo/test_wavs/1089-134686-0001.wav
python3 ./lstm_transducer_stateless2/ncnn-decode.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-99.pt
popd
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--exp-dir $repo/exp \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
\
--decode-chunk-len 32 \
--num-encoder-layers "2,4,3,2,4" \
--feedforward-dims "1024,1024,2048,2048,1024" \
--nhead "8,8,8,8,8" \
--encoder-dims "384,384,384,384,384" \
--attention-dims "192,192,192,192,192" \
--encoder-unmasked-dims "256,256,256,256,256" \
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_char_bpe/L.pt"
git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
git lfs pull --include "data/lang_char_bpe/Linv.pt"
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-9999.pt
popd
./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
--tokens $repo/data/lang_char_bpe/tokens.txt \
--exp-dir $repo/exp \
--use-averaged-model 0 \
--epoch 9999 \
--avg 1 \
--decode-chunk-len 32 \
--num-encoder-layers "2,4,3,2,4" \
--feedforward-dims "1024,1024,1536,1536,1024" \
--nhead "8,8,8,8,8" \
--encoder-dims "384,384,384,384,384" \
--attention-dims "192,192,192,192,192" \
--encoder-unmasked-dims "256,256,256,256,256" \
--zipformer-downsampling-factors "1,2,4,8,2" \
--cnn-module-kernels "31,31,31,31,31" \
--decoder-dim 512 \
--joiner-dim 512
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
--tokens $repo/data/lang_char_bpe/tokens.txt \
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
$repo/test_wavs/0.wav
rm -rf $repo
log "--------------------------------------------------------------------------"

View File

@ -1,466 +0,0 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/librispeech/ASR
log "=========================================================================="
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
log "Downloading pre-trained model from $repo_url"
git lfs install
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-99.pt
popd
log "Export via torch.jit.script()"
./zipformer/export.py \
--use-averaged-model 0 \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--avg 1 \
--jit 1
log "Test export to ONNX format"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--num-encoder-layers "2,2,3,4,3,2" \
--downsampling-factor "1,2,4,8,4,2" \
--feedforward-dim "512,768,1024,1536,1024,768" \
--num-heads "4,4,4,8,4,4" \
--encoder-dim "192,256,384,512,384,256" \
--query-head-dim 32 \
--value-head-dim 12 \
--pos-head-dim 4 \
--pos-dim 48 \
--encoder-unmasked-dim "192,192,256,256,256,192" \
--cnn-module-kernel "31,31,15,15,15,31" \
--decoder-dim 512 \
--joiner-dim 512 \
--causal False \
--chunk-size "16,32,64,-1" \
--left-context-frames "64,128,256,-1"
ls -lh $repo/exp
log "Run onnx_check.py"
./zipformer/onnx_check.py \
--jit-filename $repo/exp/jit_script.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
log "Run onnx_pretrained.py"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
log "Downloading pre-trained model from $repo_url"
git lfs install
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-99.pt
popd
log "Test export streaming model to ONNX format"
./zipformer/export-onnx-streaming.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--num-encoder-layers "2,2,3,4,3,2" \
--downsampling-factor "1,2,4,8,4,2" \
--feedforward-dim "512,768,1024,1536,1024,768" \
--num-heads "4,4,4,8,4,4" \
--encoder-dim "192,256,384,512,384,256" \
--query-head-dim 32 \
--value-head-dim 12 \
--pos-head-dim 4 \
--pos-dim 48 \
--encoder-unmasked-dim "192,192,256,256,256,192" \
--cnn-module-kernel "31,31,15,15,15,31" \
--decoder-dim 512 \
--joiner-dim 512 \
--causal True \
--chunk-size 16 \
--left-context-frames 64
ls -lh $repo/exp
log "Run onnx_pretrained-streaming.py"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
log "Downloading pre-trained model from $repo_url"
git lfs install
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-99.pt
popd
log "Export via torch.jit.trace()"
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
--bpe-model $repo/data/lang_bpe_500/bpe.model \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--decode-chunk-len 32 \
--exp-dir $repo/exp/
log "Test exporting to ONNX format"
./pruned_transducer_stateless7_streaming/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--decode-chunk-len 32 \
--exp-dir $repo/exp/
ls -lh $repo/exp
log "Run onnx_check.py"
./pruned_transducer_stateless7_streaming/onnx_check.py \
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
log "Run onnx_pretrained.py"
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
log "Downloading pre-trained model from $repo_url"
git lfs install
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
cd exp
ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
popd
log "Export via torch.jit.script()"
./pruned_transducer_stateless3/export.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 9999 \
--avg 1 \
--exp-dir $repo/exp/ \
--jit 1
log "Test exporting to ONNX format"
./pruned_transducer_stateless3/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 9999 \
--avg 1 \
--exp-dir $repo/exp/
ls -lh $repo/exp
log "Run onnx_check.py"
./pruned_transducer_stateless3/onnx_check.py \
--jit-filename $repo/exp/cpu_jit.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
log "Run onnx_pretrained.py"
./pruned_transducer_stateless3/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
cd exp
ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
popd
log "Export via torch.jit.script()"
./pruned_transducer_stateless5/export.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--avg 1 \
--use-averaged-model 0 \
--exp-dir $repo/exp \
--num-encoder-layers 18 \
--dim-feedforward 2048 \
--nhead 8 \
--encoder-dim 512 \
--decoder-dim 512 \
--joiner-dim 512 \
--jit 1
log "Test exporting to ONNX format"
./pruned_transducer_stateless5/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \
--avg 1 \
--use-averaged-model 0 \
--exp-dir $repo/exp \
--num-encoder-layers 18 \
--dim-feedforward 2048 \
--nhead 8 \
--encoder-dim 512 \
--decoder-dim 512 \
--joiner-dim 512
ls -lh $repo/exp
log "Run onnx_check.py"
./pruned_transducer_stateless5/onnx_check.py \
--jit-filename $repo/exp/cpu_jit.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
log "Run onnx_pretrained.py"
./pruned_transducer_stateless5/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=
rm -rf $repo
log "--------------------------------------------------------------------------"
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "exp/pretrained.pt"
cd exp
ln -s pretrained.pt epoch-99.pt
popd
log "Export via torch.jit.script()"
./pruned_transducer_stateless7/export.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--feedforward-dims "1024,1024,2048,2048,1024" \
--jit 1
log "Test exporting to ONNX format"
./pruned_transducer_stateless7/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--feedforward-dims "1024,1024,2048,2048,1024"
ls -lh $repo/exp
log "Run onnx_check.py"
./pruned_transducer_stateless7/onnx_check.py \
--jit-filename $repo/exp/cpu_jit.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
log "Run onnx_pretrained.py"
./pruned_transducer_stateless7/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
log "=========================================================================="
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
cd exp
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
popd
log "Test exporting to ONNX format"
./conv_emformer_transducer_stateless2/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--num-encoder-layers 12 \
--chunk-length 32 \
--cnn-module-kernel 31 \
--left-context-length 32 \
--right-context-length 8 \
--memory-size 32
log "Run onnx_pretrained.py"
./conv_emformer_transducer_stateless2/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1221-135766-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"
log "=========================================================================="
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
cd exp
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
popd
log "Export via torch.jit.trace()"
./lstm_transducer_stateless2/export.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp/ \
--jit-trace 1
log "Test exporting to ONNX format"
./lstm_transducer_stateless2/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp
ls -lh $repo/exp
log "Run onnx_check.py"
./lstm_transducer_stateless2/onnx_check.py \
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
log "Run onnx_pretrained.py"
./lstm_transducer_stateless2/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1221-135766-0001.wav
rm -rf $repo
log "--------------------------------------------------------------------------"

View File

@ -1,196 +0,0 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install kaldi-native-fbank soundfile librosa
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/wenetspeech/ASR
#https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
function export_2025_03_02() {
d=exp_2025_03_02
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/data/lang_char/tokens.txt
curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--num-encoder-layers 2,2,2,2,2,2 \
--feedforward-dim 512,768,768,768,768,768 \
--encoder-dim 192,256,256,256,256,256 \
--encoder-unmasked-dim 192,192,192,192,192,192 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
rm -rf $d
}
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-large-chinese
function export_2025_03_03() {
d=exp_2025_03_03
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2025-03-03
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
ls -lh $dst.tar.bz2
rm -rf $dst
done
rm -rf $d
}
function export_2023_06_15() {
d=exp_2023_06_15
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2023-06-15
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
ls -lh $dst.tar.bz2
rm -rf $dst
done
}
export_2025_03_02
export_2025_03_03
export_2023_06_15

View File

@ -1,86 +0,0 @@
#!/usr/bin/env bash
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/yesno/ASR
log "data preparation"
./prepare.sh
log "training"
python3 ./tdnn/train.py
log "decoding"
python3 ./tdnn/decode.py
log "export to pretrained.pt"
python3 ./tdnn/export.py --epoch 14 --avg 2
python3 ./tdnn/pretrained.py \
--checkpoint ./tdnn/exp/pretrained.pt \
--HLG ./data/lang_phone/HLG.pt \
--words-file ./data/lang_phone/words.txt \
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
download/waves_yesno/0_0_1_0_0_0_1_0.wav
log "Test exporting to torchscript"
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
python3 ./tdnn/jit_pretrained.py \
--nn-model ./tdnn/exp/cpu_jit.pt \
--HLG ./data/lang_phone/HLG.pt \
--words-file ./data/lang_phone/words.txt \
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
download/waves_yesno/0_0_1_0_0_0_1_0.wav
log "Test exporting to onnx"
python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
log "Test float32 model"
python3 ./tdnn/onnx_pretrained.py \
--nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
--HLG ./data/lang_phone/HLG.pt \
--words-file ./data/lang_phone/words.txt \
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
download/waves_yesno/0_0_1_0_0_0_1_0.wav
log "Test int8 model"
python3 ./tdnn/onnx_pretrained.py \
--nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
--HLG ./data/lang_phone/HLG.pt \
--words-file ./data/lang_phone/words.txt \
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
download/waves_yesno/0_0_1_0_0_0_1_0.wav
log "Test decoding with H"
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
python3 ./tdnn/jit_pretrained_decode_with_H.py \
--nn-model ./tdnn/exp/cpu_jit.pt \
--H ./data/lang_phone/H.fst \
--tokens ./data/lang_phone/tokens.txt \
./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
./download/waves_yesno/0_0_1_0_0_1_1_1.wav
log "Test decoding with HL"
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
python3 ./tdnn/jit_pretrained_decode_with_HL.py \
--nn-model ./tdnn/exp/cpu_jit.pt \
--HL ./data/lang_phone/HL.fst \
--words ./data/lang_phone/words.txt \
./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
./download/waves_yesno/0_0_1_0_0_1_1_1.wav
log "Show generated files"
ls -lh tdnn/exp
ls -lh data/lang_phone

View File

@ -1,72 +0,0 @@
name: aishell
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: aishell-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
aishell:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Run aishell tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/aishell/ASR/run.sh

View File

@ -1,137 +0,0 @@
name: audioset
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: audioset-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
audioset:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
ls -lh
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/audioset/AT/run.sh
- name: Show model files
shell: bash
run: |
sudo chown -R runner ./model-onnx
ls -lh ./model-onnx
chmod -x ./model-onnx/class_labels_indices.csv
echo "----------"
ls -lh ./model-onnx/*
- name: Upload model to huggingface
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
cd huggingface
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp ../model-onnx/*.onnx ./
cp ../model-onnx/*.csv ./
cp -a ../model-onnx/test_wavs ./
ls -lh
git add .
git status
git commit -m "update models"
git status
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
rm -rf huggingface
- name: Prepare for release
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
run: |
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
mv ./model-onnx $d
tar cjvf ${d}.tar.bz2 $d
ls -lh
- name: Release exported onnx models
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: sherpa-onnx-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: audio-tagging-models

View File

@ -1,152 +0,0 @@
name: baker_zh
on:
push:
branches:
- master
- baker-matcha-2
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: baker-zh-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
baker_zh:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
ls -lh
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
pip install onnx==1.17.0
pip list
git config --global --add safe.directory /icefall
.github/scripts/baker_zh/TTS/run-matcha.sh
- name: display files
shell: bash
run: |
ls -lh
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-2
path: ./model-steps-2.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-3
path: ./model-steps-3.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-4
path: ./model-steps-4.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-5
path: ./model-steps-5.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-6
path: ./model-steps-6.onnx
- name: Upload models to huggingface
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
d=matcha-icefall-zh-baker
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
cp -av $d/* hf/
pushd hf
git add .
git config --global user.name "csukuangfj"
git config --global user.email "csukuangfj@gmail.com"
git config --global lfs.allowincompletepush true
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
popd
- name: Release exported onnx models
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: matcha-icefall-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models

View File

@ -1,81 +0,0 @@
name: build-cpu-docker
on:
workflow_dispatch:
concurrency:
group: build-cpu-docker-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
echo "::set-output name=matrix::${MATRIX}"
build-cpu-docker:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
- name: 'Login to GitHub Container Registry'
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build docker Image
shell: bash
run: |
cd .github/scripts/docker
torch_version=${{ matrix.torch-version }}
torchaudio_version=${{ matrix.torchaudio-version }}
echo "torch_version: $torch_version"
echo "torchaudio_version: $torchaudio_version"
version=${{ matrix.version }}
tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
echo "tag: $tag"
docker build \
-t $tag \
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
--build-arg TORCH_VERSION=$torch_version \
--build-arg TORCHAUDIO_VERSION=$torchaudio_version \
--build-arg K2_VERSION=${{ matrix.k2-version }} \
--build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \
.
docker image ls
docker push $tag

View File

@ -1,74 +0,0 @@
# Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# refer to https://github.com/actions/starter-workflows/pull/47/files
# You can access it at https://k2-fsa.github.io/icefall/
name: Generate doc
on:
push:
branches:
- master
- doc
pull_request:
types: [labeled]
workflow_dispatch:
concurrency:
group: build_doc-${{ github.ref }}
cancel-in-progress: true
jobs:
build-doc:
# if: github.event.label.name == 'doc' || github.event_name == 'push'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.8"]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Build doc
shell: bash
run: |
.github/scripts/generate-piper-phonemize-page.py
cd docs
python3 -m pip install -r ./requirements.txt
make html
touch build/html/.nojekyll
cp -v ../piper_phonemize.html ./build/html/
- name: Deploy
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/build/html
publish_branch: gh-pages

View File

@ -1,84 +0,0 @@
# see also
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Build docker image
on:
workflow_dispatch:
concurrency:
group: build_docker-${{ github.ref }}
cancel-in-progress: true
jobs:
build-docker-image:
name: ${{ matrix.image }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Rename
shell: bash
run: |
image=${{ matrix.image }}
mv -v ./docker/$image.dockerfile ./Dockerfile
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
- name: Free more space
shell: bash
run: |
# https://github.com/orgs/community/discussions/25678
cd /opt
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
sudo rm -rf /usr/share/dotnet
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: true
- name: Check space
shell: bash
run: |
df -h
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Build and push
uses: docker/build-push-action@v4
with:
context: .
file: ./Dockerfile
push: true
tags: k2fsa/icefall:${{ matrix.image }}

View File

@ -1,167 +0,0 @@
name: ksponspeech
on:
push:
branches:
- ksponspeech
workflow_dispatch:
jobs:
ksponspeech:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Test
shell: bash
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/ksponspeech/ASR/run.sh
- name: Show model files (2024-06-24)
shell: bash
run: |
src=/tmp/model-2024-06-24
ls -lh $src
- name: Show model files (2024-06-16)
shell: bash
run: |
src=/tmp/model-2024-06-16
ls -lh $src
- name: Upload model to huggingface (2024-06-24)
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
src=/tmp/model-2024-06-24
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf hf
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 hf
cd hf
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp -av $src/* ./
ls -lh
git lfs track "bpe.model"
git lfs track "*.onnx"
git add .
git status
git commit -m "update models"
git status
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 main || true
rm -rf hf
- name: Upload model to huggingface (2024-06-16)
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
src=/tmp/model-2024-06-16
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf hf
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
cd hf
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp -v $src/* ./
ls -lh
git lfs track "bpe.model"
git lfs track "*.onnx"
cp -av test_wavs $src/
git add .
git status
git commit -m "update models"
git status
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
rm -rf hf
- name: Prepare for release (2024-06-16)
shell: bash
run: |
src=/tmp/model-2024-06-16
d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
mv $src ./$d
tar cjvf ${d}.tar.bz2 $d
ls -lh
- name: Prepare for release (2024-06-24)
shell: bash
run: |
src=/tmp/model-2024-06-24
d=sherpa-onnx-zipformer-korean-2024-06-24
mv $src ./$d
tar cjvf ${d}.tar.bz2 $d
ls -lh
- name: Release exported onnx models
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: sherpa-onnx-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

View File

@ -1,72 +0,0 @@
name: librispeech
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: librispeech-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
echo "::set-output name=matrix::${MATRIX}"
librispeech:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Test zipformer/train.py with LibriSpeech
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/librispeech/ASR/run.sh

View File

@ -1,166 +0,0 @@
name: ljspeech
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: ljspeech-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
ljspeech:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
ls -lh
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
pip install "matplotlib<=3.9.4"
pip list
.github/scripts/ljspeech/TTS/run-matcha.sh
.github/scripts/ljspeech/TTS/run.sh
- name: display files
shell: bash
run: |
ls -lh
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav
- name: Release exported onnx models
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: vits-icefall-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-2
path: ./model-steps-2.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-3
path: ./model-steps-3.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-4
path: ./model-steps-4.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-5
path: ./model-steps-5.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-6
path: ./model-steps-6.onnx
- name: Upload models to huggingface
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
d=matcha-icefall-en_US-ljspeech
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
cp -av $d/* hf/
pushd hf
git lfs track "cmn_dict"
git lfs track "ru_dict"
git add .
git config --global user.name "csukuangfj"
git config --global user.email "csukuangfj@gmail.com"
git config --global lfs.allowincompletepush true
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
popd
- name: Release exported onnx models
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: matcha-icefall-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models

View File

@ -1,86 +0,0 @@
name: multi-zh-hans
on:
push:
branches:
- master
workflow_dispatch:
concurrency:
group: multi-zh-hans-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
echo "::set-output name=matrix::${MATRIX}"
multi-zh-hans:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Test with multi_zh-hans
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
export HF_TOKEN=${{ secrets.HF_TOKEN }}
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/multi_zh-hans/ASR/run.sh
- name: Show models
shell: bash
run: |
ls -lh *.tar.bz2
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

View File

@ -1,134 +0,0 @@
name: rknn
on:
push:
branches:
- master
- rknn-zipformer2
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: rknn-${{ github.ref }}
cancel-in-progress: true
jobs:
rknn:
name: RKNN ${{ matrix.recipe }} ${{ matrix.rknn_toolkit2_version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10"]
k2-version: ["1.24.4.dev20241029"]
kaldifeat-version: ["1.25.5.dev20241029"]
torch-version: ["2.0.0"]
torchaudio-version: ["2.0.1"]
version: ["20241218"]
# recipe: ["librispeech", "wenetspeech", "multi_zh-hans"]
recipe: ["librispeech"]
rknn_toolkit2_version: ["2.2.0", "2.1.0"]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Export RKNN model
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
cat /etc/*release
lsb_release -a
uname -a
python3 --version
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
python3 -m torch.utils.collect_env
python3 -m k2.version
pip list
export rknn_toolkit2_version=${{ matrix.rknn_toolkit2_version }}
if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
# for the folder pruned_transducer_stateless7_streaming
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
else
# for the folder zipformer/
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
fi
# Install rknn
pip install ./*.whl "numpy<=1.26.4"
pip list | grep rknn
echo "---"
pip list
echo "---"
recipe=${{ matrix.recipe }}
.github/scripts/$recipe/ASR/run_rknn.sh > log-$recipe.txt 2>&1 || true
- uses: actions/upload-artifact@v4
with:
name: log-${{ matrix.recipe }}-${{ matrix.rknn_toolkit2_version }}
path: ./log-*.txt
- name: Display results
shell: bash
run: |
ls -lh *rk*.tar.bz2 || true
- name: Release to GitHub
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: sherpa-onnx-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
- name: Upload model to huggingface
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/sherpa-onnx-rknn-models huggingface
cd huggingface
git fetch
git pull
git merge -m "merge remote" --ff origin main
dst=streaming-asr
mkdir -p $dst
cp ../*rk*.tar.bz2 $dst/ || true
ls -lh $dst
git add .
git status
git commit -m "update models"
git status
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-rknn-models main || true
rm -rf huggingface

View File

@ -1,144 +0,0 @@
name: Run docker image
on:
workflow_dispatch:
concurrency:
group: run_docker_image-${{ github.ref }}
cancel-in-progress: true
jobs:
run-docker-image:
name: ${{ matrix.image }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
- name: Free more space
shell: bash
run: |
# https://github.com/orgs/community/discussions/25678
cd /opt
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
sudo rm -rf /usr/share/dotnet
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@main
with:
# this might remove tools that are actually needed,
# if set to "true" but frees about 6 GB
tool-cache: false
# all of these default to true, but feel free to set to
# "false" if necessary for your workflow
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: false
swap-storage: true
- name: Check space
shell: bash
run: |
df -h
- name: Run the build process with Docker
uses: addnab/docker-run-action@v3
with:
image: k2fsa/icefall:${{ matrix.image }}
shell: bash
run: |
uname -a
cat /etc/*release
find / -name libcuda* 2>/dev/null
ls -lh /usr/local/
ls -lh /usr/local/cuda*
nvcc --version
ls -lh /usr/local/cuda-*/compat/*
# For torch1.9.0-cuda10.2
export LD_LIBRARY_PATH=/usr/local/cuda-10.2/compat:$LD_LIBRARY_PATH
# For torch1.12.1-cuda11.3
export LD_LIBRARY_PATH=/usr/local/cuda-11.3/compat:$LD_LIBRARY_PATH
# For torch2.0.0-cuda11.7
export LD_LIBRARY_PATH=/usr/local/cuda-11.7/compat:$LD_LIBRARY_PATH
# For torch2.1.0-cuda11.8
export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH
# For torch2.1.0-cuda12.1
export LD_LIBRARY_PATH=/usr/local/cuda-12.1/compat:$LD_LIBRARY_PATH
which nvcc
cuda_dir=$(dirname $(which nvcc))
echo "cuda_dir: $cuda_dir"
find $cuda_dir -name libcuda.so*
echo "--------------------"
find / -name libcuda.so* 2>/dev/null
# for torch1.13.0-cuda11.6
if [ -e /opt/conda/lib/stubs/libcuda.so ]; then
cd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && cd -
export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH
fi
find / -name libcuda.so* 2>/dev/null
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
python3 --version
which python3
python3 -m pip list
echo "----------torch----------"
python3 -m torch.utils.collect_env
echo "----------k2----------"
python3 -c "import k2; print(k2.__file__)"
python3 -c "import k2; print(k2.__dev_version__)"
python3 -m k2.version
echo "----------lhotse----------"
python3 -c "import lhotse; print(lhotse.__file__)"
python3 -c "import lhotse; print(lhotse.__version__)"
echo "----------kaldifeat----------"
python3 -c "import kaldifeat; print(kaldifeat.__file__)"
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
echo "Test yesno recipe"
cd egs/yesno/ASR
./prepare.sh
./tdnn/train.py
./tdnn/decode.py

View File

@ -1,128 +0,0 @@
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-gigaspeech-2022-05-13
# stateless transducer + k2 pruned rnnt-loss + reworked conformer
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: run_gigaspeech_2022_05_13-${{ github.ref }}
cancel-in-progress: true
jobs:
run_gigaspeech_2022_05_13:
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Download GigaSpeech dev/test dataset
shell: bash
run: |
sudo apt-get install -y -q git-lfs
.github/scripts/download-gigaspeech-dev-test-dataset.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
ln -s ~/tmp/giga-dev-dataset-fbank/data egs/gigaspeech/ASR/
ls -lh egs/gigaspeech/ASR/data/fbank
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
shell: bash
run: |
cd egs/gigaspeech/ASR/
tree ./pruned_transducer_stateless2/exp
sudo apt-get -qq install tree
cd pruned_transducer_stateless2
echo "results for pruned_transducer_stateless2"
echo "===greedy search==="
find exp/greedy_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/

View File

@ -1,136 +0,0 @@
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-gigaspeech-zipformer-2023-10-17
# zipformer
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
cancel-in-progress: true
jobs:
run_gigaspeech_2023_10_17_zipformer:
if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
- name: Display decoding results for gigaspeech zipformer
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
cd egs/gigaspeech/ASR/
tree ./zipformer/exp
cd zipformer
echo "results for zipformer"
echo "===greedy search==="
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
# echo "===fast_beam_search==="
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
#
# echo "===modified beam search==="
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Upload decoding results for gigaspeech zipformer
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
path: egs/gigaspeech/ASR/zipformer/exp/

View File

@ -1,165 +0,0 @@
name: run-librispeech-lstm-transducer2-2022-09-03
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
cancel-in-progress: true
jobs:
run_librispeech_lstm_transducer_stateless2_2022_09_03:
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Cache LibriSpeech test-clean and test-other datasets
id: libri-test-clean-and-test-other-data
uses: actions/cache@v2
with:
path: |
~/tmp/download
key: cache-libri-test-clean-and-test-other
- name: Download LibriSpeech test-clean and test-other
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
- name: Prepare manifests for LibriSpeech test-clean and test-other
shell: bash
run: |
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
- name: Cache LibriSpeech test-clean and test-other fbank features
id: libri-test-clean-and-test-other-fbank
uses: actions/cache@v2
with:
path: |
~/tmp/fbank-libri
key: cache-libri-fbank-test-clean-and-test-other-v2
- name: Compute fbank for LibriSpeech test-clean and test-other
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
mkdir -p egs/librispeech/ASR/data
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
ls -lh egs/librispeech/ASR/data/*
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
- name: Display decoding results for lstm_transducer_stateless2
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
shell: bash
run: |
cd egs/librispeech/ASR
tree lstm_transducer_stateless2/exp
cd lstm_transducer_stateless2/exp
echo "===greedy search==="
find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
echo "===fast_beam_search==="
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
# echo "===modified beam search==="
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Display decoding results for lstm_transducer_stateless2
if: github.event.label.name == 'shallow-fusion'
shell: bash
run: |
cd egs/librispeech/ASR
tree lstm_transducer_stateless2/exp
cd lstm_transducer_stateless2/exp
echo "===modified_beam_search_lm_shallow_fusion==="
echo "===Using RNNLM==="
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Display decoding results for lstm_transducer_stateless2
if: github.event.label.name == 'LODR'
shell: bash
run: |
cd egs/librispeech/ASR
tree lstm_transducer_stateless2/exp
cd lstm_transducer_stateless2/exp
echo "===modified_beam_search_rnnlm_LODR==="
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
- name: Upload decoding results for lstm_transducer_stateless2
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/

View File

@ -1,86 +0,0 @@
# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-multi-corpora-zipformer
on:
push:
branches:
- master
pull_request:
types: [labeled]
workflow_dispatch:
concurrency:
group: run_multi-corpora_zipformer-${{ github.ref }}
cancel-in-progress: true
jobs:
run_multi-corpora_zipformer:
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans' || github.event.label.name == 'zipformer' || github.event.label.name == 'multi-corpora'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-multi-corpora-zipformer.sh

View File

@ -1,73 +0,0 @@
name: run-ptb-rnn-lm-training
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: run_ptb_rnn_lm_training-${{ github.ref }}
cancel-in-progress: true
jobs:
run_ptb_rnn_lm_training:
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.8"]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Prepare data
shell: bash
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
cd egs/ptb/LM
./prepare.sh
- name: Run training
shell: bash
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
cd egs/ptb/LM
./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
- name: Upload pretrained models
uses: actions/upload-artifact@v4
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
with:
name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
path: egs/ptb/LM/my-rnnlm-exp/

View File

@ -1,86 +0,0 @@
# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-swbd-conformer_ctc
on:
push:
branches:
- master
pull_request:
types: [labeled]
workflow_dispatch:
concurrency:
group: run-swbd-conformer_ctc-${{ github.ref }}
cancel-in-progress: true
jobs:
run-swbd-conformer_ctc:
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'swbd'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh

View File

@ -1,86 +0,0 @@
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-wenetspeech-pruned-transducer-stateless2
on:
push:
branches:
- master
pull_request:
types: [labeled]
workflow_dispatch:
concurrency:
group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
cancel-in-progress: true
jobs:
run_wenetspeech_pruned_transducer_stateless2:
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh

78
.github/workflows/run-yesno-recipe.yml vendored Normal file
View File

@ -0,0 +1,78 @@
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-yesno-recipe
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
run-yesno-recipe:
runs-on: ${{ matrix.os }}
strategy:
matrix:
# os: [ubuntu-18.04, macos-10.15]
# TODO: enable macOS for CPU testing
os: [ubuntu-18.04]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install libnsdfile and libsox
if: startsWith(matrix.os, 'ubuntu')
run: |
sudo apt update
sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
- name: Install Python dependencies
run: |
python3 -m pip install --upgrade pip black flake8
python3 -m pip install -U pip
python3 -m pip install k2==1.4.dev20210822+cpu.torch1.7.1 -f https://k2-fsa.org/nightly/
python3 -m pip install torchaudio==0.7.2
python3 -m pip install git+https://github.com/lhotse-speech/lhotse
# We are in ./icefall and there is a file: requirements.txt in it
python3 -m pip install -r requirements.txt
- name: Run yesno recipe
shell: bash
working-directory: ${{github.workspace}}
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
echo $PYTHONPATH
cd egs/yesno/ASR
./prepare.sh
python3 ./tdnn/train.py
python3 ./tdnn/decode.py
# TODO: Check that the WER is less than some value

View File

@ -24,19 +24,13 @@ on:
branches:
- master
workflow_dispatch:
concurrency:
group: style_check-${{ github.ref }}
cancel-in-progress: true
jobs:
style_check:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.10"]
os: [ubuntu-18.04, macos-10.15]
python-version: [3.7, 3.9]
fail-fast: false
steps:
@ -51,27 +45,18 @@ jobs:
- name: Install Python dependencies
run: |
python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0 isort==5.10.1
# Click issue fixed in https://github.com/psf/black/pull/2966
python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2
- name: Run flake8
shell: bash
working-directory: ${{github.workspace}}
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 \
--statistics --extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503
flake8 . --count --show-source --statistics
flake8 .
- name: Run black
shell: bash
working-directory: ${{github.workspace}}
run: |
black --check --diff .
- name: Run isort
shell: bash
working-directory: ${{github.workspace}}
run: |
isort --check --diff .

View File

@ -1,77 +0,0 @@
name: test-ncnn-export
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: test_ncnn_export-${{ github.ref }}
cancel-in-progress: true
jobs:
test_ncnn_export:
if: github.event.label.name == 'ready' || github.event.label.name == 'ncnn' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Test ncnn export
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/test-ncnn-export.sh

View File

@ -1,77 +0,0 @@
name: test-onnx-export
on:
push:
branches:
- master
pull_request:
types: [labeled]
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: test_onnx_export-${{ github.ref }}
cancel-in-progress: true
jobs:
test_onnx_export:
if: github.event.label.name == 'ready' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Test ONNX export
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/test-onnx-export.sh

View File

@ -1,111 +1,71 @@
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: test
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: test-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
test:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
os: [ubuntu-18.04, macos-10.15]
python-version: [3.6, 3.7, 3.8, 3.9]
torch: ["1.8.1"]
k2-version: ["1.4.dev20210822"]
fail-fast: false
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Free space
shell: bash
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Install Python dependencies
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
python3 -m pip install --upgrade pip pytest
pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
# icefall requirements
pip install -r requirements.txt
- name: Run tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
if: startsWith(matrix.os, 'ubuntu')
run: |
ls -lh
export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH
echo $PYTHONPATH
pytest ./test
pytest -v -s ./test
# runt tests for conformer ctc
cd egs/librispeech/ASR/conformer_ctc
pytest -v -s
cd ../pruned_transducer_stateless
pytest -v -s
cd ../pruned_transducer_stateless2
pytest -v -s
cd ../pruned_transducer_stateless3
pytest -v -s
cd ../pruned_transducer_stateless4
pytest -v -s
echo $PYTHONPATH
cd ../pruned_transducer_stateless7
pytest -v -s
cd ../transducer_stateless
pytest -v -s
# cd ../transducer
# pytest -v -s
cd ../transducer_stateless2
pytest -v -s
cd ../transducer_lstm
pytest -v -s
cd ../zipformer
pytest -v -s
- uses: actions/upload-artifact@v4
with:
path: egs/librispeech/ASR/zipformer/swoosh.pdf
name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }}
- name: Run tests
if: startsWith(matrix.os, 'macos')
run: |
ls -lh
export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH
lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
echo "lib_path: $lib_path"
export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH
pytest ./test

View File

@ -1,67 +0,0 @@
name: yesno
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
concurrency:
group: yesno-${{ github.ref }}
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.5.0")
echo "::set-output name=matrix::${MATRIX}"
yesno:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Run the yesno recipe
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
python3 -m torch.utils.collect_env
python3 -m k2.version
pip list
.github/scripts/yesno/ASR/run.sh

33
.gitignore vendored
View File

@ -1,38 +1,7 @@
icefall.egg-info/
data
__pycache__
path.sh
exp
exp*/
*.pt
download
dask-worker-space
log
*.bak
*-bak
*bak.py
# Ignore Mac system files
.DS_store
# Ignore node_modules folder
node_modules
# ignore .nfs
.nfs*
# Ignore all text files
*.txt
# Ignore files related to API keys
.env
# Ignore SASS config files
.sass-cache
*.param
*.bin
.DS_Store
*.fst
*.arpa
download/

View File

@ -1,38 +1,24 @@
repos:
- repo: https://github.com/psf/black
rev: 22.3.0
rev: 21.6b0
hooks:
- id: black
args: ["--line-length=88"]
additional_dependencies: ['click==8.1.0']
exclude: icefall\/__init__\.py
args: [--line-length=80]
- repo: https://github.com/PyCQA/flake8
rev: 5.0.4
rev: 3.9.2
hooks:
- id: flake8
args: ["--max-line-length=88", "--extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503"]
# What are we ignoring here?
# E203: whitespace before ':'
# E266: too many leading '#' for block comment
# E501: line too long
# F401: module imported but unused
# E402: module level import not at top of file
# F403: 'from module import *' used; unable to detect undefined names
# F841: local variable is assigned to but never used
# W503: line break before binary operator
# In addition, the default ignore list is:
# E121,E123,E126,E226,E24,E704,W503,W504
args: [--max-line-length=80]
- repo: https://github.com/pycqa/isort
rev: 5.12.0
rev: 5.9.2
hooks:
- id: isort
args: ["--profile=black"]
args: [--profile=black, --line-length=80]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
rev: v4.0.1
hooks:
- id: check-executables-have-shebangs
- id: end-of-file-fixer

View File

@ -1,4 +1,13 @@
Legal Notices
NOTE (this is not from the Apache License): The copyright model is that
authors (or their employers, if noted in individual files) own their
individual contributions. The authors' contributions can be discerned
from the git history.
-------------------------------------------------------------------------
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/

380
README.md
View File

@ -2,85 +2,22 @@
<img src="https://raw.githubusercontent.com/k2-fsa/icefall/master/docs/source/_static/logo.png" width=168>
</div>
# Introduction
## Installation
The icefall project contains speech-related recipes for various datasets
using [k2-fsa](https://github.com/k2-fsa/k2) and [lhotse](https://github.com/lhotse-speech/lhotse).
You can use [sherpa](https://github.com/k2-fsa/sherpa), [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn) or [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) for deployment with models
in icefall; these frameworks also support models not included in icefall; please refer to respective documents for more details.
You can try pre-trained models from within your browser without the need
to download or install anything by visiting this [huggingface space](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition).
Please refer to [document](https://k2-fsa.github.io/icefall/huggingface/spaces.html) for more details.
# Installation
Please refer to [document](https://k2-fsa.github.io/icefall/installation/index.html)
Please refer to <https://icefall.readthedocs.io/en/latest/installation/index.html>
for installation.
# Recipes
## Recipes
Please refer to [document](https://k2-fsa.github.io/icefall/recipes/index.html)
for more details.
Please refer to <https://icefall.readthedocs.io/en/latest/recipes/index.html>
for more information.
## ASR: Automatic Speech Recognition
We provide two recipes at present:
### Supported Datasets
- [yesno][yesno]
- [Aidatatang_200zh][aidatatang_200zh]
- [Aishell][aishell]
- [Aishell2][aishell2]
- [Aishell4][aishell4]
- [Alimeeting][alimeeting]
- [AMI][ami]
- [CommonVoice][commonvoice]
- [Corpus of Spontaneous Japanese][csj]
- [GigaSpeech][gigaspeech]
- [LibriCSS][libricss]
- [LibriSpeech][librispeech]
- [Libriheavy][libriheavy]
- [Multi-Dialect Broadcast News Arabic Speech Recognition][mgb2]
- [SPGISpeech][spgispeech]
- [Switchboard][swbd]
- [TIMIT][timit]
- [TED-LIUM3][tedlium3]
- [TAL_CSASR][tal_csasr]
- [Voxpopuli][voxpopuli]
- [XBMU-AMDO31][xbmu-amdo31]
- [WenetSpeech][wenetspeech]
More datasets will be added in the future.
### Supported Models
The [LibriSpeech][librispeech] recipe supports the most comprehensive set of models, you are welcome to try them out.
#### CTC
- TDNN LSTM CTC
- Conformer CTC
- Zipformer CTC
#### MMI
- Conformer MMI
- Zipformer MMI
#### Transducer
- Conformer-based Encoder
- LSTM-based Encoder
- Zipformer-based Encoder
- LSTM-based Predictor
- [Stateless Predictor](https://research.google/pubs/rnn-transducer-with-stateless-prediction-network/)
#### Whisper
- [OpenAi Whisper](https://arxiv.org/abs/2212.04356) (We support fine-tuning on AiShell-1.)
If you are willing to contribute to icefall, please refer to [contributing](https://k2-fsa.github.io/icefall/contributing/index.html) for more details.
We would like to highlight the performance of some of the recipes here.
### [yesno][yesno]
### yesno
This is the simplest ASR recipe in `icefall` and can be run on CPU.
Training takes less than 30 seconds and gives you the following WER:
@ -88,302 +25,37 @@ Training takes less than 30 seconds and gives you the following WER:
```
[test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
```
We provide a Colab notebook for this recipe: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)
We do provide a Colab notebook for this recipe.
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)
### [LibriSpeech][librispeech]
Please see [RESULTS.md](https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md)
for the **latest** results.
### LibriSpeech
#### [Conformer CTC](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conformer_ctc)
We provide two models for this recipe: [conformer CTC model][LibriSpeech_conformer_ctc]
and [TDNN LSTM CTC model][LibriSpeech_tdnn_lstm_ctc].
| | test-clean | test-other |
|-----|------------|------------|
| WER | 2.42 | 5.73 |
#### Conformer CTC Model
The best WER we currently have is:
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing)
||test-clean|test-other|
|--|--|--|
|WER| 2.57% | 5.94% |
#### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/tdnn_lstm_ctc)
We provide a Colab notebook to run a pre-trained conformer CTC model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing)
| | test-clean | test-other |
|-----|------------|------------|
| WER | 6.59 | 17.69 |
#### TDNN LSTM CTC Model
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1-iSfQMp2So-We_Uu49N4AAcMInB72u9z?usp=sharing)
The WER for this model is:
||test-clean|test-other|
|--|--|--|
|WER| 6.59% | 17.69% |
#### [Transducer (Conformer Encoder + LSTM Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/transducer)
| | test-clean | test-other |
|---------------|------------|------------|
| greedy_search | 3.07 | 7.51 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_u6yK9jDkPwG_NLrZMN2XK7Aeq4suMO2?usp=sharing)
#### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/transducer)
| | test-clean | test-other |
|---------------------------------------|------------|------------|
| modified_beam_search (`beam_size=4`) | 2.56 | 6.27 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CO1bXJ-2khDckZIW8zjOPHGSKLHpTDlp?usp=sharing)
#### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/zipformer)
WER (modified_beam_search `beam_size=4` unless further stated)
1. LibriSpeech-960hr
| Encoder | Params | test-clean | test-other | epochs | devices |
|-----------------|--------|------------|------------|---------|------------|
| Zipformer | 65.5M | 2.21 | 4.79 | 50 | 4 32G-V100 |
| Zipformer-small | 23.2M | 2.42 | 5.73 | 50 | 2 32G-V100 |
| Zipformer-large | 148.4M | 2.06 | 4.63 | 50 | 4 32G-V100 |
| Zipformer-large | 148.4M | 2.00 | 4.38 | 174 | 8 80G-A100 |
2. LibriSpeech-960hr + GigaSpeech
| Encoder | Params | test-clean | test-other |
|-----------------|--------|------------|------------|
| Zipformer | 65.5M | 1.78 | 4.08 |
3. LibriSpeech-960hr + GigaSpeech + CommonVoice
| Encoder | Params | test-clean | test-other |
|-----------------|--------|------------|------------|
| Zipformer | 65.5M | 1.90 | 3.98 |
### [GigaSpeech][gigaspeech]
#### [Conformer CTC](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/conformer_ctc)
| | Dev | Test |
|-----|-------|-------|
| WER | 10.47 | 10.58 |
#### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/pruned_transducer_stateless2)
Conformer Encoder + Stateless Predictor + k2 Pruned RNN-T Loss
| | Dev | Test |
|----------------------|-------|-------|
| greedy_search | 10.51 | 10.73 |
| fast_beam_search | 10.50 | 10.69 |
| modified_beam_search | 10.40 | 10.51 |
#### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/zipformer)
| | Dev | Test |
|----------------------|-------|-------|
| greedy_search | 10.31 | 10.50 |
| fast_beam_search | 10.26 | 10.48 |
| modified_beam_search | 10.25 | 10.38 |
### [Aishell][aishell]
#### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/tdnn_lstm_ctc)
| | test |
|-----|-------|
| CER | 10.16 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1jbyzYq3ytm6j2nlEt-diQm-6QVWyDDEa?usp=sharing)
#### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/transducer_stateless)
| | test |
|-----|------|
| CER | 4.38 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14XaT2MhnBkK-3_RqqWq3K90Xlbin-GZC?usp=sharing)
#### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/zipformer)
WER (modified_beam_search `beam_size=4`)
| Encoder | Params | dev | test | epochs |
|-----------------|--------|-----|------|---------|
| Zipformer | 73.4M | 4.13| 4.40 | 55 |
| Zipformer-small | 30.2M | 4.40| 4.67 | 55 |
| Zipformer-large | 157.3M | 4.03| 4.28 | 56 |
### [Aishell4][aishell4]
#### [Transducer (pruned_transducer_stateless5)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell4/ASR/pruned_transducer_stateless5)
1 Trained with all subsets:
| | test |
|-----|------------|
| CER | 29.08 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
### [TIMIT][timit]
#### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_lstm_ctc)
| |TEST|
|---|----|
|PER| 19.71% |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Hs9DA4V96uapw_30uNp32OMJgkuR5VVd?usp=sharing)
#### [TDNN LiGRU CTC](https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_ligru_ctc)
| |TEST|
|---|----|
|PER| 17.66% |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
### [TED-LIUM3][tedlium3]
#### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/tedlium3/ASR/transducer_stateless)
| | dev | test |
|--------------------------------------|-------|--------|
| modified_beam_search (`beam_size=4`) | 6.91 | 6.33 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MmY5bBxwvKLNT4A2DJnwiqRXhdchUqPN?usp=sharing)
#### [Transducer (pruned_transducer_stateless)](https://github.com/k2-fsa/icefall/tree/master/egs/tedlium3/ASR/pruned_transducer_stateless)
| | dev | test |
|--------------------------------------|-------|--------|
| modified_beam_search (`beam_size=4`) | 6.77 | 6.14 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1je_1zGrOkGVVd4WLzgkXRHxl-I27yWtz?usp=sharing)
### [Aidatatang_200zh][aidatatang_200zh]
#### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2)
| | Dev | Test |
|----------------------|-------|-------|
| greedy_search | 5.53 | 6.59 |
| fast_beam_search | 5.30 | 6.34 |
| modified_beam_search | 5.27 | 6.33 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1wNSnSj3T5oOctbh5IGCa393gKOoQw2GH?usp=sharing)
### [WenetSpeech][wenetspeech]
#### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/wenetspeech/ASR/pruned_transducer_stateless2)
| | Dev | Test-Net | Test-Meeting |
|----------------------|-------|----------|--------------|
| greedy_search | 7.80 | 8.75 | 13.49 |
| fast_beam_search | 7.94 | 8.74 | 13.80 |
| modified_beam_search | 7.76 | 8.71 | 13.41 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EV4e1CHa1GZgEF-bZgizqI9RyFFehIiN?usp=sharing)
#### [Transducer **Streaming** (pruned_transducer_stateless5) ](https://github.com/k2-fsa/icefall/tree/master/egs/wenetspeech/ASR/pruned_transducer_stateless5)
| | Dev | Test-Net | Test-Meeting |
|----------------------|-------|----------|--------------|
| greedy_search | 8.78 | 10.12 | 16.16 |
| fast_beam_search| 9.01 | 10.47 | 16.28 |
| modified_beam_search | 8.53| 9.95 | 15.81 |
### [Alimeeting][alimeeting]
#### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/alimeeting/ASR/pruned_transducer_stateless2)
| | Eval | Test-Net |
|----------------------|--------|----------|
| greedy_search | 31.77 | 34.66 |
| fast_beam_search | 31.39 | 33.02 |
| modified_beam_search | 30.38 | 34.25 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tKr3f0mL17uO_ljdHGKtR7HOmthYHwJG?usp=sharing)
### [TAL_CSASR][tal_csasr]
#### [Transducer (pruned_transducer_stateless5)](https://github.com/k2-fsa/icefall/tree/master/egs/tal_csasr/ASR/pruned_transducer_stateless5)
The best results for Chinese CER(%) and English WER(%) respectively (zh: Chinese, en: English):
|decoding-method | dev | dev_zh | dev_en | test | test_zh | test_en |
|--|--|--|--|--|--|--|
|greedy_search| 7.30 | 6.48 | 19.19 |7.39| 6.66 | 19.13|
|fast_beam_search| 7.18 | 6.39| 18.90 | 7.27| 6.55 | 18.77|
|modified_beam_search| 7.15 | 6.35 | 18.95 | 7.22| 6.50 | 18.70 |
We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DmIx-NloI1CMU5GdZrlse7TRu4y3Dpf8?usp=sharing)
## TTS: Text-to-Speech
### Supported Datasets
- [LJSpeech][ljspeech]
- [VCTK][vctk]
- [LibriTTS][libritts_tts]
### Supported Models
- [VITS](https://arxiv.org/abs/2106.06103)
# Deployment with C++
Once you have trained a model in icefall, you may want to deploy it with C++ without Python dependencies.
Please refer to
- https://k2-fsa.github.io/icefall/model-export/export-with-torch-jit-script.html
- https://k2-fsa.github.io/icefall/model-export/export-onnx.html
- https://k2-fsa.github.io/icefall/model-export/export-ncnn.html
for how to do this.
We also provide a Colab notebook, showing you how to run a torch scripted model in [k2][k2] with C++.
Please see: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1BIGLWzS36isskMXHKcqC9ysN6pspYXs_?usp=sharing)
We provide a Colab notebook to run a pre-trained TDNN LSTM CTC model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1kNmDXNMwREi0rZGAOIAOJo93REBuOTcd?usp=sharing)
[LibriSpeech_tdnn_lstm_ctc]: egs/librispeech/ASR/tdnn_lstm_ctc
[LibriSpeech_conformer_ctc]: egs/librispeech/ASR/conformer_ctc
[yesno]: egs/yesno/ASR
[librispeech]: egs/librispeech/ASR
[aishell]: egs/aishell/ASR
[aishell2]: egs/aishell2/ASR
[aishell4]: egs/aishell4/ASR
[timit]: egs/timit/ASR
[tedlium3]: egs/tedlium3/ASR
[gigaspeech]: egs/gigaspeech/ASR
[aidatatang_200zh]: egs/aidatatang_200zh/ASR
[wenetspeech]: egs/wenetspeech/ASR
[alimeeting]: egs/alimeeting/ASR
[tal_csasr]: egs/tal_csasr/ASR
[ami]: egs/ami
[swbd]: egs/swbd/ASR
[k2]: https://github.com/k2-fsa/k2
[commonvoice]: egs/commonvoice/ASR
[csj]: egs/csj/ASR
[libricss]: egs/libricss/SURT
[libritts_asr]: egs/libritts/ASR
[libriheavy]: egs/libriheavy/ASR
[mgb2]: egs/mgb2/ASR
[spgispeech]: egs/spgispeech/ASR
[voxpopuli]: egs/voxpopuli/ASR
[xbmu-amdo31]: egs/xbmu-amdo31/ASR
[vctk]: egs/vctk/TTS
[ljspeech]: egs/ljspeech/TTS
[libritts_tts]: egs/libritts/TTS
## Acknowledgements
Some contributors to this project were supported by Xiaomi Corporation. Others were supported by National Science Foundation CCRI award 2120435. This is not an exhaustive list of sources of support.

View File

@ -1,37 +1,39 @@
# Contributing to Our Project
Thank you for your interest in contributing to our project! We use Git pre-commit hooks to ensure code quality and consistency. Before contributing, please follow these guidelines to enable and use the pre-commit hooks.
## Pre-commit hooks
## Pre-Commit Hooks
We use [git][git] [pre-commit][pre-commit] [hooks][hooks] to check that files
going to be committed:
We have set up pre-commit hooks to check that the files you're committing meet our coding and formatting standards. These checks include:
- contain no trailing spaces
- are formatted with [black][black]
- are compatible to [PEP8][PEP8] (checked by [flake8][flake8])
- end in a newline and only a newline
- contain sorted `imports` (checked by [isort][isort])
- Ensuring there are no trailing spaces.
- Formatting code with [black](https://github.com/psf/black).
- Checking compliance with PEP8 using [flake8](https://flake8.pycqa.org/).
- Verifying that files end with a newline character (and only a newline).
- Sorting imports using [isort](https://pycqa.github.io/isort/).
These hooks are disabled by default. Please use the following commands to enable them:
Please note that these hooks are disabled by default. To enable them, follow these steps:
```bash
pip install pre-commit # run it only once
pre-commit install # run it only once, it will install all hooks
### Installation (Run only once)
# modify some files
git add <some files>
git commit # It runs all hooks automatically.
1. Install the `pre-commit` package using pip:
```bash
pip install pre-commit
```
1. Install the Git hooks using:
```bash
pre-commit install
```
### Making a Commit
Once you have enabled the pre-commit hooks, follow these steps when making a commit:
1. Make your changes to the codebase.
2. Stage your changes by using git add for the files you modified.
3. Commit your changes using git commit. The pre-commit hooks will run automatically at this point.
4. If all hooks run successfully, you can write your commit message, and your changes will be successfully committed.
5. If any hook fails, your commit will not be successful. Please read and follow the error messages provided, make the necessary changes, and then re-run git add and git commit.
# If all hooks run successfully, you can write the commit message now. Done!
#
# If any hook failed, your commit was not successful.
# Please read the error messages and make changes accordingly.
# And rerun
### Your Contribution
Your contributions are valuable to us, and by following these guidelines, you help maintain code consistency and quality in our project. We appreciate your dedication to ensuring high-quality code. If you have questions or need assistance, feel free to reach out to us. Thank you for being part of our open-source community!
git add <some files>
git commit
```
[git]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
[flake8]: https://github.com/PyCQA/flake8
[PEP8]: https://www.python.org/dev/peps/pep-0008/
[black]: https://github.com/psf/black
[hooks]: https://github.com/pre-commit/pre-commit-hooks
[pre-commit]: https://github.com/pre-commit/pre-commit
[isort]: https://github.com/PyCQA/isort

View File

@ -1,129 +0,0 @@
# icefall dockerfile
## Download from dockerhub
You can find pre-built docker image for icefall at the following address:
<https://hub.docker.com/r/k2fsa/icefall/tags>
Example usage:
```bash
docker run --gpus all --rm -it k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash
```
## Build from dockerfile
2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8.
Otherwise, since the older PyTorch images are not updated with the [apt-key rotation by NVIDIA](https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key), you have to go for case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. Ensure that your NVDIA driver supports at least CUDA 11.0.
You can check the highest CUDA version within your NVIDIA driver's support with the `nvidia-smi` command below. In this example, the highest CUDA version is 11.0, i.e. case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
```bash
$ nvidia-smi
Tue Sep 20 00:26:13 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.119.03 Driver Version: 450.119.03 CUDA Version: 11.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 TITAN RTX On | 00000000:03:00.0 Off | N/A |
| 41% 31C P8 4W / 280W | 16MiB / 24219MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
| 1 TITAN RTX On | 00000000:04:00.0 Off | N/A |
| 41% 30C P8 11W / 280W | 6MiB / 24220MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 2085 G /usr/lib/xorg/Xorg 9MiB |
| 0 N/A N/A 2240 G /usr/bin/gnome-shell 4MiB |
| 1 N/A N/A 2085 G /usr/lib/xorg/Xorg 4MiB |
+-----------------------------------------------------------------------------+
```
## Building images locally
If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly.
For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details.
```dockerfile
ENV http_proxy=http://aaa.bb.cc.net:8080 \
https_proxy=http://aaa.bb.cc.net:8080
```
Then, proceed with these commands.
### If you are case (a), i.e. your NVIDIA driver supports CUDA version >= 11.3:
```bash
cd docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8
docker build -t icefall/pytorch1.12.1 .
```
### If you are case (b), i.e. your NVIDIA driver can only support CUDA versions 11.0 <= x < 11.3:
```bash
cd docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8
docker build -t icefall/pytorch1.7.1 .
```
## Running your built local image
Sample usage of the GPU based images. These commands are written with case (a) in mind, so please make the necessary changes to your image name if you are case (b).
Note: use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) to run the GPU images.
```bash
docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all icefall/pytorch1.12.1
```
### Tips:
1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`.
2. Also, if your environment requires a proxy, this would be a good time to add it in too: `-e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080`.
Overall, your docker run command should look like this.
```bash
docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all -v {/path/in/host/machine}:{/path/in/docker} -e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080 icefall/pytorch1.12.1
```
You can explore more docker run options [here](https://docs.docker.com/engine/reference/commandline/run/) to suit your environment.
### Linking to icefall in your host machine
If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container.
Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine.
Warning: Check that the icefall in your host machine is visible from within your container before proceeding to the commands below.
Use these commands once you are inside the container.
```bash
rm -r /workspace/icefall
ln -s {/path/in/docker/to/icefall} /workspace/icefall
```
## Starting another session in the same running container.
```bash
docker exec -it icefall /bin/bash
```
## Restarting a killed container that has been run before.
```bash
docker start -ai icefall
```
## Sample usage of the CPU based images:
```bash
docker run -it icefall /bin/bash
```

View File

@ -1,74 +0,0 @@
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
# https_proxy=http://aaa.bbb.cc.net:8080
# install normal source
RUN apt-get update && \
apt-get install -y --no-install-recommends \
g++ \
make \
automake \
autoconf \
bzip2 \
unzip \
wget \
sox \
libtool \
git \
subversion \
zlib1g-dev \
gfortran \
ca-certificates \
patch \
ffmpeg \
valgrind \
libssl-dev \
vim \
curl
# cmake
RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
cd /opt && \
tar -zxvf cmake-3.18.0.tar.gz && \
cd cmake-3.18.0 && \
./bootstrap && \
make && \
make install && \
rm -rf cmake-3.18.0.tar.gz && \
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
cd -
# flac
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
cd /opt && \
xz -d flac-1.3.2.tar.xz && \
tar -xvf flac-1.3.2.tar && \
cd flac-1.3.2 && \
./configure && \
make && make install && \
rm -rf flac-1.3.2.tar && \
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
cd -
RUN conda install -y -c pytorch torchaudio=0.12 && \
pip install graphviz
#install k2 from source
RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
cd /opt/k2 && \
python3 setup.py install && \
cd -
# install lhotse
RUN pip install git+https://github.com/lhotse-speech/lhotse
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install -r requirements.txt
RUN pip install kaldifeat
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,90 +0,0 @@
FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
# ENV http_proxy=http://aaa.bbb.cc.net:8080 \
# https_proxy=http://aaa.bbb.cc.net:8080
RUN rm /etc/apt/sources.list.d/cuda.list && \
rm /etc/apt/sources.list.d/nvidia-ml.list && \
apt-key del 7fa2af80
# install normal source
RUN apt-get update && \
apt-get install -y --no-install-recommends \
g++ \
make \
automake \
autoconf \
bzip2 \
unzip \
wget \
sox \
libtool \
git \
subversion \
zlib1g-dev \
gfortran \
ca-certificates \
patch \
ffmpeg \
valgrind \
libssl-dev \
vim \
curl
# Add new keys and reupdate
RUN curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub | apt-key add - && \
curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
rm -rf /var/lib/apt/lists/* && \
mv /opt/conda/lib/libcufft.so.10 /opt/libcufft.so.10.bak && \
mv /opt/conda/lib/libcurand.so.10 /opt/libcurand.so.10.bak && \
mv /opt/conda/lib/libcublas.so.11 /opt/libcublas.so.11.bak && \
mv /opt/conda/lib/libnvrtc.so.11.0 /opt/libnvrtc.so.11.1.bak && \
# mv /opt/conda/lib/libnvToolsExt.so.1 /opt/libnvToolsExt.so.1.bak && \
mv /opt/conda/lib/libcudart.so.11.0 /opt/libcudart.so.11.0.bak && \
apt-get update && apt-get -y upgrade
# cmake
RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
cd /opt && \
tar -zxvf cmake-3.18.0.tar.gz && \
cd cmake-3.18.0 && \
./bootstrap && \
make && \
make install && \
rm -rf cmake-3.18.0.tar.gz && \
find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
cd -
# flac
RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz && \
cd /opt && \
xz -d flac-1.3.2.tar.xz && \
tar -xvf flac-1.3.2.tar && \
cd flac-1.3.2 && \
./configure && \
make && make install && \
rm -rf flac-1.3.2.tar && \
find /opt/flac-1.3.2 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
cd -
RUN conda install -y -c pytorch torchaudio=0.7.1 && \
pip install graphviz
#install k2 from source
RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
cd /opt/k2 && \
python3 setup.py install && \
cd -
# install lhotse
RUN pip install git+https://github.com/lhotse-speech/lhotse
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,72 +0,0 @@
FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.7
ARG K2_VERSION="1.24.4.dev20240223+cuda11.3.torch1.12.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.3.torch1.12.1"
ARG TORCHAUDIO_VERSION="0.12.1+cu113"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,74 +0,0 @@
FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.9
ARG K2_VERSION="1.24.4.dev20240223+cuda11.6.torch1.13.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.6.torch1.13.0"
ARG TORCHAUDIO_VERSION="0.13.0+cu116"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
ENV LD_LIBRARY_PATH /opt/conda/lib/stubs:$LD_LIBRARY_PATH
WORKDIR /workspace/icefall

View File

@ -1,88 +0,0 @@
FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.7
ARG K2_VERSION="1.24.4.dev20240223+cuda10.2.torch1.9.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda10.2.torch1.9.0"
ARG TORCHAUDIO_VERSION="0.9.0"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
# see https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/
RUN rm /etc/apt/sources.list.d/cuda.list && \
rm /etc/apt/sources.list.d/nvidia-ml.list && \
apt-key del 7fa2af80
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
rm -v cuda-keyring_1.0-1_all.deb && \
apt-get update && \
rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip uninstall -y tqdm && \
pip install -U --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz \
tqdm>=4.63.0
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda11.7.torch2.0.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.7.torch2.0.0"
ARG TORCHAUDIO_VERSION="2.0.0+cu117"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.1.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.1.0"
ARG TORCHAUDIO_VERSION="2.1.0+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.1.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.1.0"
ARG TORCHAUDIO_VERSION="2.1.0+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.0-cuda11.8-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.2.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.2.0"
ARG TORCHAUDIO_VERSION="2.2.0+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.2.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.2.0"
ARG TORCHAUDIO_VERSION="2.2.0+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.1-cuda11.8-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.2.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.2.1"
ARG TORCHAUDIO_VERSION="2.2.1+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.1-cuda12.1-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.2.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.2.1"
ARG TORCHAUDIO_VERSION="2.2.1+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.2-cuda11.8-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240328+cuda11.8.torch2.2.2"
ARG KALDIFEAT_VERSION="1.25.4.dev20240329+cuda11.8.torch2.2.2"
ARG TORCHAUDIO_VERSION="2.2.2+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240328+cuda12.1.torch2.2.2"
ARG KALDIFEAT_VERSION="1.25.4.dev20240329+cuda12.1.torch2.2.2"
ARG TORCHAUDIO_VERSION="2.2.2+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240606+cuda11.8.torch2.3.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240606+cuda11.8.torch2.3.1"
ARG TORCHAUDIO_VERSION="2.3.1+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240606+cuda12.1.torch2.3.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240606+cuda12.1.torch2.3.1"
ARG TORCHAUDIO_VERSION="2.3.1+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.0-cuda11.8-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240725+cuda11.8.torch2.4.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda11.8.torch2.4.0"
ARG TORCHAUDIO_VERSION="2.4.0+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240725+cuda12.1.torch2.4.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda12.1.torch2.4.0"
ARG TORCHAUDIO_VERSION="2.4.0+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240725+cuda12.4.torch2.4.0"
ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda12.4.torch2.4.0"
ARG TORCHAUDIO_VERSION="2.4.0+cu124"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.1-cuda11.8-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240905+cuda11.8.torch2.4.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda11.8.torch2.4.1"
ARG TORCHAUDIO_VERSION="2.4.1+cu118"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240905+cuda12.1.torch2.4.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.1.torch2.4.1"
ARG TORCHAUDIO_VERSION="2.4.1+cu121"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,73 +0,0 @@
FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-devel
# python 3.10
ENV LC_ALL C.UTF-8
ARG DEBIAN_FRONTEND=noninteractive
# python 3.10
ARG K2_VERSION="1.24.4.dev20240905+cuda12.4.torch2.4.1"
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.4.torch2.4.1"
ARG TORCHAUDIO_VERSION="2.4.1+cu124"
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
LABEL k2_version=${K2_VERSION}
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
LABEL github_repo="https://github.com/k2-fsa/icefall"
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
vim \
libssl-dev \
autoconf \
automake \
bzip2 \
ca-certificates \
ffmpeg \
g++ \
gfortran \
git \
libtool \
make \
patch \
sox \
subversion \
unzip \
valgrind \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
# Install dependencies
RUN pip install --no-cache-dir \
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
kaldi_native_io \
kaldialign \
kaldifst \
kaldilm \
sentencepiece>=0.1.96 \
tensorboard \
typeguard \
dill \
onnx \
onnxruntime \
onnxmltools \
onnxoptimizer \
onnxsim \
multi_quantization \
typeguard \
numpy \
pytest \
graphviz
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
cd /workspace/icefall && \
pip install --no-cache-dir -r requirements.txt
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
WORKDIR /workspace/icefall

View File

@ -1,24 +0,0 @@
## Usage
```bash
cd /path/to/icefall/docs
pip install -r requirements.txt
make clean
make html
cd build/html
python3 -m http.server 8000
```
It prints:
```
Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
```
Open your browser and go to <http://0.0.0.0:8000/> to view the generated
documentation.
Done!
**Hint**: You can change the port number when starting the server.

View File

@ -1,3 +1,2 @@
sphinx_rtd_theme
sphinx
sphinxcontrib-youtube==1.1.0

View File

@ -16,6 +16,7 @@
import sphinx_rtd_theme
# -- Project information -----------------------------------------------------
project = "icefall"
@ -32,9 +33,7 @@ release = "0.1"
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.todo",
"sphinx_rtd_theme",
"sphinxcontrib.youtube",
]
# Add any paths that contain templates here, relative to this directory.
@ -74,30 +73,5 @@ html_context = {
"github_user": "k2-fsa",
"github_repo": "icefall",
"github_version": "master",
"conf_py_path": "/docs/source/",
"conf_py_path": "/icefall/docs/source/",
}
todo_include_todos = True
rst_epilog = """
.. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
.. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
.. _icefall: https://github.com/k2-fsa/icefall
.. _git-lfs: https://git-lfs.com/
.. _ncnn: https://github.com/tencent/ncnn
.. _LibriSpeech: https://www.openslr.org/12
.. _Gigaspeech: https://github.com/SpeechColab/GigaSpeech
.. _musan: http://www.openslr.org/17/
.. _ONNX: https://github.com/onnx/onnx
.. _onnxruntime: https://github.com/microsoft/onnxruntime
.. _torch: https://github.com/pytorch/pytorch
.. _torchaudio: https://github.com/pytorch/audio
.. _k2: https://github.com/k2-fsa/k2
.. _lhotse: https://github.com/lhotse-speech/lhotse
.. _yesno: https://www.openslr.org/1/
.. _Next-gen Kaldi: https://github.com/k2-fsa
.. _Kaldi: https://github.com/kaldi-asr/kaldi
.. _lilcom: https://github.com/danpovey/lilcom
.. _CTC: https://www.cs.toronto.edu/~graves/icml_2006.pdf
.. _kaldi-decoder: https://github.com/k2-fsa/kaldi-decoder
"""

View File

@ -11,9 +11,9 @@ We use the following tools to make the code style to be as consistent as possibl
The following versions of the above tools are used:
- ``black == 22.3.0``
- ``flake8 == 5.0.4``
- ``isort == 5.10.1``
- ``black == 12.6b0``
- ``flake8 == 3.9.2``
- ``isort == 5.9.2``
After running the following commands:
@ -38,7 +38,7 @@ Please fix any issues reported by the check tools.
.. HINT::
Some of the check tools, i.e., ``black`` and ``isort`` will modify
the files to be committed **in-place**. So please run ``git status``
the files to be commited **in-place**. So please run ``git status``
after failure to see which file has been modified by the tools
before you make any further changes.
@ -54,17 +54,10 @@ it should succeed this time:
If you want to check the style of your code before ``git commit``, you
can do the following:
.. code-block:: bash
$ pre-commit install
$ pre-commit run
Or without installing the pre-commit hooks:
.. code-block:: bash
$ cd icefall
$ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
$ pip install black==21.6b0 flake8==3.9.2 isort==5.9.2
$ black --check your_changed_file.py
$ black your_changed_file.py # modify it in-place
$

View File

@ -3,7 +3,7 @@ How to create a recipe
.. HINT::
Please read :ref:`follow the code style` to adjust your code style.
Please read :ref:`follow the code style` to adjust your code sytle.
.. CAUTION::

View File

@ -1,187 +0,0 @@
.. _LODR:
LODR for RNN Transducer
=======================
As a type of E2E model, neural transducers are usually considered as having an internal
language model, which learns the language level information on the training corpus.
In real-life scenario, there is often a mismatch between the training corpus and the target corpus space.
This mismatch can be a problem when decoding for neural transducer models with language models as its internal
language can act "against" the external LM. In this tutorial, we show how to use
`Low-order Density Ratio <https://arxiv.org/abs/2203.16776>`_ to alleviate this effect to further improve the performance
of langugae model integration.
.. note::
This tutorial is based on the recipe
`pruned_transducer_stateless7_streaming <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_,
which is a streaming transducer model trained on `LibriSpeech`_.
However, you can easily apply LODR to other recipes.
If you encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`__.
.. note::
For simplicity, the training and testing corpus in this tutorial are the same (`LibriSpeech`_). However,
you can change the testing set to any other domains (e.g `GigaSpeech`_) and prepare the language models
using that corpus.
First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here <https://arxiv.org/abs/2002.11268>`_
to address the language information mismatch between the training
corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain
are acoustically similar, DR derives the following formula for decoding with Bayes' theorem:
.. math::
\text{score}\left(y_u|\mathit{x},y\right) =
\log p\left(y_u|\mathit{x},y_{1:u-1}\right) +
\lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) -
\lambda_2 \log p_{\text{Source LM}}\left(y_u|\mathit{x},y_{1:u-1}\right)
where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively.
Here, the source domain LM is trained on the training corpus. The only difference in the above formula compared to
shallow fusion is the subtraction of the source domain LM.
Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is
considered to be weak and can only capture low-level language information. Therefore, `LODR <https://arxiv.org/abs/2203.16776>`__ proposed to use
a low-order n-gram LM as an approximation of the ILM of the neural transducer. This leads to the following formula
during decoding for transducer model:
.. math::
\text{score}\left(y_u|\mathit{x},y\right) =
\log p_{rnnt}\left(y_u|\mathit{x},y_{1:u-1}\right) +
\lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) -
\lambda_2 \log p_{\text{bi-gram}}\left(y_u|\mathit{x},y_{1:u-1}\right)
In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Compared to DR,
the only difference lies in the choice of source domain LM. According to the original `paper <https://arxiv.org/abs/2203.16776>`_,
LODR achieves similar performance compared to DR in both intra-domain and cross-domain settings.
As a bi-gram is much faster to evaluate, LODR is usually much faster.
Now, we will show you how to use LODR in ``icefall``.
For illustration purpose, we will use a pre-trained ASR model from this `link <https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_.
If you want to train your model from scratch, please have a look at :ref:`non_streaming_librispeech_pruned_transducer_stateless`.
The testing scenario here is intra-domain (we decode the model trained on `LibriSpeech`_ on `LibriSpeech`_ testing sets).
As the initial step, let's download the pre-trained model.
.. code-block:: bash
$ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
$ cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
$ git lfs pull --include "pretrained.pt"
$ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded
$ cd ../data/lang_bpe_500
$ git lfs pull --include bpe.model
$ cd ../../..
To test the model, let's have a look at the decoding results **without** using LM. This can be done via the following command:
.. code-block:: bash
$ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
$ ./pruned_transducer_stateless7_streaming/decode.py \
--epoch 99 \
--avg 1 \
--use-averaged-model False \
--exp-dir $exp_dir \
--bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \
--max-duration 600 \
--decode-chunk-len 32 \
--decoding-method modified_beam_search
The following WERs are achieved on test-clean and test-other:
.. code-block:: text
$ For test-clean, WER of different settings are:
$ beam_size_4 3.11 best for test-clean
$ For test-other, WER of different settings are:
$ beam_size_4 7.93 best for test-other
Then, we download the external language model and bi-gram LM that are necessary for LODR.
Note that the bi-gram is estimated on the LibriSpeech 960 hours' text.
.. code-block:: bash
$ # download the external LM
$ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
$ # create a symbolic link so that the checkpoint can be loaded
$ pushd icefall-librispeech-rnn-lm/exp
$ git lfs pull --include "pretrained.pt"
$ ln -s pretrained.pt epoch-99.pt
$ popd
$
$ # download the bi-gram
$ git lfs install
$ git clone https://huggingface.co/marcoyang/librispeech_bigram
$ pushd data/lang_bpe_500
$ ln -s ../../librispeech_bigram/2gram.fst.txt .
$ popd
Then, we perform LODR decoding by setting ``--decoding-method`` to ``modified_beam_search_lm_LODR``:
.. code-block:: bash
$ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
$ lm_dir=./icefall-librispeech-rnn-lm/exp
$ lm_scale=0.42
$ LODR_scale=-0.24
$ ./pruned_transducer_stateless7_streaming/decode.py \
--epoch 99 \
--avg 1 \
--use-averaged-model False \
--beam-size 4 \
--exp-dir $exp_dir \
--max-duration 600 \
--decode-chunk-len 32 \
--decoding-method modified_beam_search_LODR \
--bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \
--use-shallow-fusion 1 \
--lm-type rnn \
--lm-exp-dir $lm_dir \
--lm-epoch 99 \
--lm-scale $lm_scale \
--lm-avg 1 \
--rnn-lm-embedding-dim 2048 \
--rnn-lm-hidden-dim 2048 \
--rnn-lm-num-layers 3 \
--lm-vocab-size 500 \
--tokens-ngram 2 \
--ngram-lm-scale $LODR_scale
There are two extra arguments that need to be given when doing LODR. ``--tokens-ngram`` specifies the order of n-gram. As we
are using a bi-gram, we set it to 2. ``--ngram-lm-scale`` is the scale of the bi-gram, it should be a negative number
as we are subtracting the bi-gram's score during decoding.
The decoding results obtained with the above command are shown below:
.. code-block:: text
$ For test-clean, WER of different settings are:
$ beam_size_4 2.61 best for test-clean
$ For test-other, WER of different settings are:
$ beam_size_4 6.74 best for test-other
Recall that the lowest WER we obtained in :ref:`shallow_fusion` with beam size of 4 is ``2.77/7.08``, LODR
indeed **further improves** the WER. We can do even better if we increase ``--beam-size``:
.. list-table:: WER of LODR with different beam sizes
:widths: 25 25 50
:header-rows: 1
* - Beam size
- test-clean
- test-other
* - 4
- 2.61
- 6.74
* - 8
- 2.45
- 6.38
* - 12
- 2.4
- 6.23

Some files were not shown because too many files have changed in this diff Show More