diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index cf0523401..1b6d0026f 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -55,9 +55,9 @@ RUN pip install --no-cache-dir \ "numpy<2.0" \ onnxoptimizer \ onnxsim \ - onnx \ + onnx==1.17.0 \ onnxmltools \ - onnxruntime \ + onnxruntime==1.17.1 \ piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \ pypinyin==0.50.0 \ pytest \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 638e19498..7f36e278d 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -63,23 +63,24 @@ def get_torchaudio_version(torch_version): def get_matrix(min_torch_version, specified_torch_version, specified_python_version): - k2_version = "1.24.4.dev20241029" - kaldifeat_version = "1.25.5.dev20241029" - version = "20241218" + k2_version = "1.24.4.dev20250630" + kaldifeat_version = "1.25.5.dev20250630" + version = "20250630" # torchaudio 2.5.0 does not support python 3.13 - python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] + python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] torch_version = [] torch_version += ["1.13.0", "1.13.1"] torch_version += ["2.0.0", "2.0.1"] - # torch_version += ["2.1.0", "2.1.1", "2.1.2"] - # torch_version += ["2.2.0", "2.2.1", "2.2.2"] + torch_version += ["2.1.0", "2.1.1", "2.1.2"] + torch_version += ["2.2.0", "2.2.1", "2.2.2"] # Test only torch >= 2.3.0 torch_version += ["2.3.0", "2.3.1"] torch_version += ["2.4.0"] torch_version += ["2.4.1"] torch_version += ["2.5.0"] torch_version += ["2.5.1"] + torch_version += ["2.6.0", "2.7.0", "2.7.1"] if specified_torch_version: torch_version = [specified_torch_version] @@ -109,12 +110,8 @@ def get_matrix(min_torch_version, specified_torch_version, specified_python_vers # torch>=2.5 requires python 3.10 continue - if t == "2.5.1": - k2_version_2 = "1.24.4.dev20241122" - kaldifeat_version_2 = "1.25.5.dev20241126" - else: - k2_version_2 = k2_version - kaldifeat_version_2 = kaldifeat_version + k2_version_2 = k2_version + kaldifeat_version_2 = kaldifeat_version matrix.append( { diff --git a/.github/scripts/multi-zh-hans.sh b/.github/scripts/multi-zh-hans.sh deleted file mode 100755 index e254419ff..000000000 --- a/.github/scripts/multi-zh-hans.sh +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -git config --global user.name "k2-fsa" -git config --global user.email "csukuangfj@gmail.com" -git config --global lfs.allowincompletepush true - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -log "pwd: $PWD" - -cd egs/multi_zh-hans/ASR - -repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) -pushd $repo -cd exp -git lfs pull --include pretrained.pt -ln -s pretrained.pt epoch-99.pt -cd ../data/lang_bpe_2000 -ls -lh -git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model -git lfs pull --include "*.model" -ls -lh -popd - -log "--------------------------------------------" -log "Export non-streaming ONNX transducer models " -log "--------------------------------------------" -./zipformer/export-onnx.py \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --use-averaged-model 0 \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --causal False - -ls -lh $repo/exp - -./zipformer/onnx_pretrained.py \ - --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav \ - $repo/test_wavs/DEV_T0000000001.wav \ - $repo/test_wavs/DEV_T0000000002.wav \ - $repo/test_wavs/TEST_MEETING_T0000000113.wav \ - $repo/test_wavs/TEST_MEETING_T0000000219.wav \ - $repo/test_wavs/TEST_MEETING_T0000000351.wav - -rm -rf $repo - -repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05 -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -pushd $repo -cd exp/ -git lfs pull --include pretrained.pt -rm -fv epoch-20.pt -rm -fv *.onnx -ln -s pretrained.pt epoch-20.pt -cd ../data/lang_bpe_2000 -ls -lh -git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model -git lfs pull --include "*.model" -ls -lh -popd - -log "----------------------------------------" -log "Export streaming ONNX CTC models " -log "----------------------------------------" -./zipformer/export-onnx-streaming-ctc.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --causal 1 \ - --avg 1 \ - --epoch 20 \ - --use-averaged-model 0 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --use-ctc 1 - -ls -lh $repo/exp/ - -log "------------------------------------------------------------" -log "Test exported streaming ONNX CTC models (greedy search) " -log "------------------------------------------------------------" - -test_wavs=( -DEV_T0000000000.wav -DEV_T0000000001.wav -DEV_T0000000002.wav -TEST_MEETING_T0000000113.wav -TEST_MEETING_T0000000219.wav -TEST_MEETING_T0000000351.wav -) - -for w in ${test_wavs[@]}; do - ./zipformer/onnx_pretrained-streaming-ctc.py \ - --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/$w -done - -log "Upload onnx CTC models to huggingface" -url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 -GIT_LFS_SKIP_SMUDGE=1 git clone $url -dst=$(basename $url) -cp -v $repo/exp/ctc*.onnx $dst -cp -v $repo/data/lang_bpe_2000/tokens.txt $dst -cp -v $repo/data/lang_bpe_2000/bpe.model $dst -mkdir -p $dst/test_wavs -cp -v $repo/test_wavs/*.wav $dst/test_wavs -cd $dst -git lfs track "*.onnx" "bpe.model" -ls -lh -file bpe.model -git status -git add . -git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true - -log "Upload models to https://github.com/k2-fsa/sherpa-onnx" -rm -rf .git -rm -fv .gitattributes -cd .. -tar cjfv $dst.tar.bz2 $dst -ls -lh *.tar.bz2 -mv -v $dst.tar.bz2 ../../../ - -log "----------------------------------------" -log "Export streaming ONNX transducer models " -log "----------------------------------------" - -./zipformer/export-onnx-streaming.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --causal 1 \ - --avg 1 \ - --epoch 20 \ - --use-averaged-model 0 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --use-ctc 0 - -ls -lh $repo/exp - -log "------------------------------------------------------------" -log "Test exported streaming ONNX transducer models (Python code)" -log "------------------------------------------------------------" - -log "test fp32" -./zipformer/onnx_pretrained-streaming.py \ - --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav - -log "test int8" -./zipformer/onnx_pretrained-streaming.py \ - --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav - -log "Upload onnx transducer models to huggingface" - -url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12 -GIT_LFS_SKIP_SMUDGE=1 git clone $url -dst=$(basename $url) -cp -v $repo/exp/encoder*.onnx $dst -cp -v $repo/exp/decoder*.onnx $dst -cp -v $repo/exp/joiner*.onnx $dst -cp -v $repo/data/lang_bpe_2000/tokens.txt $dst -cp -v $repo/data/lang_bpe_2000/bpe.model $dst -mkdir -p $dst/test_wavs -cp -v $repo/test_wavs/*.wav $dst/test_wavs -cd $dst -git lfs track "*.onnx" bpe.model -git add . -git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true - -log "Upload models to https://github.com/k2-fsa/sherpa-onnx" -rm -rf .git -rm -fv .gitattributes -cd .. -tar cjfv $dst.tar.bz2 $dst -ls -lh *.tar.bz2 -mv -v $dst.tar.bz2 ../../../ diff --git a/.github/scripts/multi_zh-hans/ASR/run.sh b/.github/scripts/multi_zh-hans/ASR/run.sh new file mode 100755 index 000000000..345b64cf0 --- /dev/null +++ b/.github/scripts/multi_zh-hans/ASR/run.sh @@ -0,0 +1,756 @@ +#!/usr/bin/env bash + +set -ex + +git config --global user.name "k2-fsa" +git config --global user.email "csukuangfj@gmail.com" +git config --global lfs.allowincompletepush true + +python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/multi_zh-hans/ASR + +log "pwd: $PWD" + +function run_2023_9_2() { + repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + cd exp + git lfs pull --include pretrained.pt + ln -s pretrained.pt epoch-99.pt + cd ../data/lang_bpe_2000 + ls -lh + git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model + git lfs pull --include "*.model" + ls -lh + popd + + log "--------------------------------------------" + log "Export non-streaming ONNX transducer models " + log "--------------------------------------------" + ./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --causal False \ + --fp16 1 + + ls -lh $repo/exp + + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + + rm -rf $repo +} + +function run_2023_11_05_streaming() { + repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05 + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + pushd $repo + cd exp/ + git lfs pull --include pretrained.pt + rm -fv epoch-20.pt + rm -fv *.onnx + ln -s pretrained.pt epoch-20.pt + cd ../data/lang_bpe_2000 + ls -lh + git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model + git lfs pull --include "*.model" + ls -lh + popd + + log "----------------------------------------" + log "Export streaming ONNX CTC models " + log "----------------------------------------" + ./zipformer/export-onnx-streaming-ctc.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 20 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 1 \ + --fp16 1 + + ls -lh $repo/exp/ + + log "------------------------------------------------------------" + log "Test exported streaming ONNX CTC models (greedy search) " + log "------------------------------------------------------------" + + test_wavs=( + DEV_T0000000000.wav + DEV_T0000000001.wav + DEV_T0000000002.wav + TEST_MEETING_T0000000113.wav + TEST_MEETING_T0000000219.wav + TEST_MEETING_T0000000351.wav + ) + + for w in ${test_wavs[@]}; do + log "----fp32----" + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w + + log "----int8----" + + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w + + log "----fp16----" + + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w + done + + log "Upload onnx CTC models to huggingface" + name=( + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 + ) + for n in ${name[@]}; do + url=https://huggingface.co/k2-fsa/$n + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + fi + + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" "bpe.model" "*.wav" + ls -lh + file bpe.model + git status + git add . + git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ + done + + log "----------------------------------------" + log "Export streaming ONNX transducer models " + log "----------------------------------------" + + ./zipformer/export-onnx-streaming.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 20 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 0 \ + --fp16 1 + + ls -lh $repo/exp + + log "------------------------------------------------------------" + log "Test exported streaming ONNX transducer models (Python code)" + log "------------------------------------------------------------" + + log "test fp32" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav + + log "test int8" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav + + log "test fp16" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav + + name=( + sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 + sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 + sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 + ) + + for n in ${name[@]}; do + url=https://huggingface.co/csukuangfj/$n + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + fi + + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" "bpe.model" "*.wav" + ls -lh + file bpe.model + git status + git add . + git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ + done +} + +function run_2023_12_12_streaming() { + log "Upload onnx transducer models to huggingface" + + url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12 + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + cp -v $repo/exp/encoder*.onnx $dst + cp -v $repo/exp/decoder*.onnx $dst + cp -v $repo/exp/joiner*.onnx $dst + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" bpe.model "*.wav" + git add . + git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ +} + +function run_yuekai_large() { + repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + git lfs pull --include pretrained.pt + mv pretrained.pt epoch-99.pt + curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt + popd + + log "----------------------------------------" + log "Export streaming ONNX CTC models " + log "----------------------------------------" + ./zipformer/export-onnx-streaming-ctc.py \ + --exp-dir $repo/ \ + --tokens $repo/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 99 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 1 \ + \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 768,1024,1536,2048,1536,768 \ + --encoder-dim 256,384,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + \ + --fp16 1 \ + --use-whisper-features 1 + + + ls -lh $repo/ + pushd $repo + +cat >README.md <README.md < 2GB + external_filename = Path(filename).stem + + onnx.save( + model, + filename, + save_as_external_data=True, + all_tensors_to_one_file=True, + location=external_filename + ".weights", + ) + else: + onnx.save(model, filename) + + +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def export_onnx_fp16_large_2gb(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path + + onnx_fp16_model = convert_float_to_float16_model_path( + onnx_fp32_path, keep_io_types=True + ) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) class OnnxModel(nn.Module): @@ -270,6 +326,8 @@ def export_streaming_ctc_model_onnx( model: OnnxModel, encoder_filename: str, opset_version: int = 11, + use_whisper_features: bool = False, + use_external_data: bool = False, ) -> None: model.encoder.__class__.forward = model.encoder.__class__.streaming_forward @@ -367,6 +425,10 @@ def export_streaming_ctc_model_onnx( "value_head_dims": value_head_dims, "num_heads": num_heads, } + + if use_whisper_features: + meta_data["feature"] = "whisper" + logging.info(f"meta_data: {meta_data}") for i in range(len(init_state[:-2]) // 6): @@ -411,7 +473,11 @@ def export_streaming_ctc_model_onnx( }, ) - add_meta_data(filename=encoder_filename, meta_data=meta_data) + add_meta_data( + filename=encoder_filename, + meta_data=meta_data, + use_external_data=use_external_data, + ) @torch.no_grad() @@ -542,11 +608,18 @@ def main(): opset_version = 13 logging.info("Exporting model") - model_filename = params.exp_dir / f"ctc-{suffix}.onnx" + + if params.use_external_data: + model_filename = f"ctc-{suffix}.onnx" + else: + model_filename = params.exp_dir / f"ctc-{suffix}.onnx" + export_streaming_ctc_model_onnx( model, - model_filename, + str(model_filename), opset_version=opset_version, + use_whisper_features=params.use_whisper_features, + use_external_data=params.use_external_data, ) logging.info(f"Exported model to {model_filename}") @@ -555,7 +628,11 @@ def main(): logging.info("Generate int8 quantization models") - model_filename_int8 = params.exp_dir / f"ctc-{suffix}.int8.onnx" + if params.use_external_data: + model_filename_int8 = f"ctc-{suffix}.int8.onnx" + else: + model_filename_int8 = params.exp_dir / f"ctc-{suffix}.int8.onnx" + quantize_dynamic( model_input=model_filename, model_output=model_filename_int8, @@ -563,6 +640,14 @@ def main(): weight_type=QuantType.QInt8, ) + if params.fp16: + if params.use_external_data: + model_filename_fp16 = f"ctc-{suffix}.fp16.onnx" + export_onnx_fp16_large_2gb(model_filename, model_filename_fp16) + else: + model_filename_fp16 = params.exp_dir / f"ctc-{suffix}.fp16.onnx" + export_onnx_fp16(model_filename, model_filename_fp16) + if __name__ == "__main__": formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index a35eb5287..29541570b 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -162,12 +162,47 @@ def get_parser(): help="Whether to export models in fp16", ) + parser.add_argument( + "--use-whisper-features", + type=str2bool, + default=False, + help="True to use whisper features. Must match the one used in training", + ) + + parser.add_argument( + "--use-external-data", + type=str2bool, + default=False, + help="Set it to true for model file size > 2GB", + ) + add_model_arguments(parser) return parser -def add_meta_data(filename: str, meta_data: Dict[str, str]): +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def export_onnx_fp16_large_2gb(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path + + onnx_fp16_model = convert_float_to_float16_model_path( + onnx_fp32_path, keep_io_types=True + ) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def add_meta_data( + filename: str, meta_data: Dict[str, str], use_external_data: bool = False +): """Add meta data to an ONNX model. It is changed in-place. Args: @@ -182,7 +217,19 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): meta.key = key meta.value = value - onnx.save(model, filename) + if use_external_data: + # For models file size > 2GB + external_filename = Path(filename).stem + + onnx.save( + model, + filename, + save_as_external_data=True, + all_tensors_to_one_file=True, + location=external_filename + ".weights", + ) + else: + onnx.save(model, filename) class OnnxEncoder(nn.Module): @@ -342,6 +389,8 @@ def export_encoder_model_onnx( encoder_filename: str, opset_version: int = 11, feature_dim: int = 80, + use_whisper_features: bool = False, + use_external_data: bool = False, ) -> None: encoder_model.encoder.__class__.forward = ( encoder_model.encoder.__class__.streaming_forward @@ -441,6 +490,9 @@ def export_encoder_model_onnx( "value_head_dims": value_head_dims, "num_heads": num_heads, } + if use_whisper_features: + meta_data["feature"] = "whisper" + logging.info(f"meta_data: {meta_data}") for i in range(len(init_state[:-2]) // 6): @@ -485,7 +537,11 @@ def export_encoder_model_onnx( }, ) - add_meta_data(filename=encoder_filename, meta_data=meta_data) + add_meta_data( + filename=encoder_filename, + meta_data=meta_data, + use_external_data=use_external_data, + ) def export_decoder_model_onnx( @@ -728,12 +784,17 @@ def main(): opset_version = 13 logging.info("Exporting encoder") - encoder_filename = params.exp_dir / f"encoder-{suffix}.onnx" + if params.use_external_data: + encoder_filename = f"encoder-{suffix}.onnx" + else: + encoder_filename = params.exp_dir / f"encoder-{suffix}.onnx" export_encoder_model_onnx( encoder, - encoder_filename, + str(encoder_filename), opset_version=opset_version, feature_dim=params.feature_dim, + use_whisper_features=params.use_whisper_features, + use_external_data=params.use_external_data, ) logging.info(f"Exported encoder to {encoder_filename}") @@ -756,31 +817,31 @@ def main(): logging.info(f"Exported joiner to {joiner_filename}") if params.fp16: - from onnxconverter_common import float16 - logging.info("Generate fp16 models") - encoder = onnx.load(encoder_filename) - encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) - encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" - onnx.save(encoder_fp16, encoder_filename_fp16) + if params.use_external_data: + encoder_filename_fp16 = f"encoder-{suffix}.fp16.onnx" + export_onnx_fp16_large_2gb(encoder_filename, encoder_filename_fp16) + else: + encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" + export_onnx_fp16(encoder_filename, encoder_filename_fp16) - decoder = onnx.load(decoder_filename) - decoder_fp16 = float16.convert_float_to_float16(decoder, keep_io_types=True) decoder_filename_fp16 = params.exp_dir / f"decoder-{suffix}.fp16.onnx" - onnx.save(decoder_fp16, decoder_filename_fp16) + export_onnx_fp16(decoder_filename, decoder_filename_fp16) - joiner = onnx.load(joiner_filename) - joiner_fp16 = float16.convert_float_to_float16(joiner, keep_io_types=True) joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" - onnx.save(joiner_fp16, joiner_filename_fp16) + export_onnx_fp16(joiner_filename, joiner_filename_fp16) # Generate int8 quantization models # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection logging.info("Generate int8 quantization models") - encoder_filename_int8 = params.exp_dir / f"encoder-{suffix}.int8.onnx" + if params.use_external_data: + encoder_filename_int8 = f"encoder-{suffix}.int8.onnx" + else: + encoder_filename_int8 = params.exp_dir / f"encoder-{suffix}.int8.onnx" + quantize_dynamic( model_input=encoder_filename, model_output=encoder_filename_int8, diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index a56a7a3e6..03c7d6f82 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -70,7 +70,6 @@ import onnx import torch import torch.nn as nn from decoder import Decoder -from onnxconverter_common import float16 from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params @@ -182,6 +181,15 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): onnx.save(model, filename) +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + class OnnxEncoder(nn.Module): """A wrapper for Zipformer and the encoder_proj from the joiner""" @@ -595,20 +603,14 @@ def main(): if params.fp16: logging.info("Generate fp16 models") - encoder = onnx.load(encoder_filename) - encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" - onnx.save(encoder_fp16, encoder_filename_fp16) + export_onnx_fp16(encoder_filename, encoder_filename_fp16) - decoder = onnx.load(decoder_filename) - decoder_fp16 = float16.convert_float_to_float16(decoder, keep_io_types=True) decoder_filename_fp16 = params.exp_dir / f"decoder-{suffix}.fp16.onnx" - onnx.save(decoder_fp16, decoder_filename_fp16) + export_onnx_fp16(decoder_filename, decoder_filename_fp16) - joiner = onnx.load(joiner_filename) - joiner_fp16 = float16.convert_float_to_float16(joiner, keep_io_types=True) joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" - onnx.save(joiner_fp16, joiner_filename_fp16) + export_onnx_fp16(joiner_filename, joiner_filename_fp16) # Generate int8 quantization models # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection