From a318ac20c3ddca8794c236a573f38e1fb760433a Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 30 Jun 2025 11:34:50 +0800 Subject: [PATCH] export fp16 onnx models --- .github/scripts/docker/Dockerfile | 4 +- .../scripts/docker/generate_build_matrix.py | 21 +- .github/scripts/multi_zh-hans/ASR/run.sh | 602 +++++++++++++++++- .github/workflows/multi-zh-hans.yml | 22 +- .../ASR/zipformer/export-onnx-ctc.py | 24 +- .../zipformer/export-onnx-streaming-ctc.py | 84 ++- .../ASR/zipformer/export-onnx-streaming.py | 85 ++- egs/librispeech/ASR/zipformer/export-onnx.py | 22 +- 8 files changed, 768 insertions(+), 96 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index cf0523401..1b6d0026f 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -55,9 +55,9 @@ RUN pip install --no-cache-dir \ "numpy<2.0" \ onnxoptimizer \ onnxsim \ - onnx \ + onnx==1.17.0 \ onnxmltools \ - onnxruntime \ + onnxruntime==1.17.1 \ piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \ pypinyin==0.50.0 \ pytest \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 638e19498..7f36e278d 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -63,23 +63,24 @@ def get_torchaudio_version(torch_version): def get_matrix(min_torch_version, specified_torch_version, specified_python_version): - k2_version = "1.24.4.dev20241029" - kaldifeat_version = "1.25.5.dev20241029" - version = "20241218" + k2_version = "1.24.4.dev20250630" + kaldifeat_version = "1.25.5.dev20250630" + version = "20250630" # torchaudio 2.5.0 does not support python 3.13 - python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] + python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] torch_version = [] torch_version += ["1.13.0", "1.13.1"] torch_version += ["2.0.0", "2.0.1"] - # torch_version += ["2.1.0", "2.1.1", "2.1.2"] - # torch_version += ["2.2.0", "2.2.1", "2.2.2"] + torch_version += ["2.1.0", "2.1.1", "2.1.2"] + torch_version += ["2.2.0", "2.2.1", "2.2.2"] # Test only torch >= 2.3.0 torch_version += ["2.3.0", "2.3.1"] torch_version += ["2.4.0"] torch_version += ["2.4.1"] torch_version += ["2.5.0"] torch_version += ["2.5.1"] + torch_version += ["2.6.0", "2.7.0", "2.7.1"] if specified_torch_version: torch_version = [specified_torch_version] @@ -109,12 +110,8 @@ def get_matrix(min_torch_version, specified_torch_version, specified_python_vers # torch>=2.5 requires python 3.10 continue - if t == "2.5.1": - k2_version_2 = "1.24.4.dev20241122" - kaldifeat_version_2 = "1.25.5.dev20241126" - else: - k2_version_2 = k2_version - kaldifeat_version_2 = kaldifeat_version + k2_version_2 = k2_version + kaldifeat_version_2 = kaldifeat_version matrix.append( { diff --git a/.github/scripts/multi_zh-hans/ASR/run.sh b/.github/scripts/multi_zh-hans/ASR/run.sh index c0b804d9e..345b64cf0 100755 --- a/.github/scripts/multi_zh-hans/ASR/run.sh +++ b/.github/scripts/multi_zh-hans/ASR/run.sh @@ -6,6 +6,8 @@ git config --global user.name "k2-fsa" git config --global user.email "csukuangfj@gmail.com" git config --global lfs.allowincompletepush true +python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx + log() { # This function is from espnet local fname=${BASH_SOURCE[1]##*/} @@ -41,7 +43,8 @@ function run_2023_9_2() { --epoch 99 \ --avg 1 \ --exp-dir $repo/exp \ - --causal False + --causal False \ + --fp16 1 ls -lh $repo/exp @@ -57,6 +60,30 @@ function run_2023_9_2() { $repo/test_wavs/TEST_MEETING_T0000000219.wav \ $repo/test_wavs/TEST_MEETING_T0000000351.wav + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + rm -rf $repo } @@ -91,7 +118,8 @@ function run_2023_11_05_streaming() { --use-averaged-model 0 \ --chunk-size 16 \ --left-context-frames 128 \ - --use-ctc 1 + --use-ctc 1 \ + --fp16 1 ls -lh $repo/exp/ @@ -109,36 +137,65 @@ function run_2023_11_05_streaming() { ) for w in ${test_wavs[@]}; do + log "----fp32----" + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w + + log "----int8----" + ./zipformer/onnx_pretrained-streaming-ctc.py \ --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ --tokens $repo/data/lang_bpe_2000/tokens.txt \ $repo/test_wavs/$w + + log "----fp16----" + + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w done log "Upload onnx CTC models to huggingface" - url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 - GIT_LFS_SKIP_SMUDGE=1 git clone $url - dst=$(basename $url) - cp -v $repo/exp/ctc*.onnx $dst - cp -v $repo/data/lang_bpe_2000/tokens.txt $dst - cp -v $repo/data/lang_bpe_2000/bpe.model $dst - mkdir -p $dst/test_wavs - cp -v $repo/test_wavs/*.wav $dst/test_wavs - cd $dst - git lfs track "*.onnx" "bpe.model" - ls -lh - file bpe.model - git status - git add . - git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + name=( + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 + sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 + ) + for n in ${name[@]}; do + url=https://huggingface.co/k2-fsa/$n + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then + cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + fi - log "Upload models to https://github.com/k2-fsa/sherpa-onnx" - rm -rf .git - rm -fv .gitattributes - cd .. - tar cjfv $dst.tar.bz2 $dst - ls -lh *.tar.bz2 - mv -v $dst.tar.bz2 ../../../ + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" "bpe.model" "*.wav" + ls -lh + file bpe.model + git status + git add . + git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ + done log "----------------------------------------" log "Export streaming ONNX transducer models " @@ -153,7 +210,8 @@ function run_2023_11_05_streaming() { --use-averaged-model 0 \ --chunk-size 16 \ --left-context-frames 128 \ - --use-ctc 0 + --use-ctc 0 \ + --fp16 1 ls -lh $repo/exp @@ -176,6 +234,59 @@ function run_2023_11_05_streaming() { --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ --tokens $repo/data/lang_bpe_2000/tokens.txt \ $repo/test_wavs/DEV_T0000000000.wav + + log "test fp16" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav + + name=( + sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 + sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 + sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 + ) + + for n in ${name[@]}; do + url=https://huggingface.co/csukuangfj/$n + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst + elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then + cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst + fi + + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" "bpe.model" "*.wav" + ls -lh + file bpe.model + git status + git add . + git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ + done } function run_2023_12_12_streaming() { @@ -192,7 +303,7 @@ function run_2023_12_12_streaming() { mkdir -p $dst/test_wavs cp -v $repo/test_wavs/*.wav $dst/test_wavs cd $dst - git lfs track "*.onnx" bpe.model + git lfs track "*.onnx" bpe.model "*.wav" git add . git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true @@ -205,6 +316,441 @@ function run_2023_12_12_streaming() { mv -v $dst.tar.bz2 ../../../ } -run_2023_9_2 +function run_yuekai_large() { + repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + git lfs pull --include pretrained.pt + mv pretrained.pt epoch-99.pt + curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt + popd + + log "----------------------------------------" + log "Export streaming ONNX CTC models " + log "----------------------------------------" + ./zipformer/export-onnx-streaming-ctc.py \ + --exp-dir $repo/ \ + --tokens $repo/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 99 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 1 \ + \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 768,1024,1536,2048,1536,768 \ + --encoder-dim 256,384,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + \ + --fp16 1 \ + --use-whisper-features 1 + + + ls -lh $repo/ + pushd $repo + +cat >README.md <README.md < 2GB + external_filename = Path(filename).stem + + onnx.save( + model, + filename, + save_as_external_data=True, + all_tensors_to_one_file=True, + location=external_filename + ".weights", + ) + else: + onnx.save(model, filename) + + +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def export_onnx_fp16_large_2gb(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path + + onnx_fp16_model = convert_float_to_float16_model_path( + onnx_fp32_path, keep_io_types=True + ) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) class OnnxModel(nn.Module): @@ -278,6 +327,7 @@ def export_streaming_ctc_model_onnx( encoder_filename: str, opset_version: int = 11, use_whisper_features: bool = False, + use_external_data: bool = False, ) -> None: model.encoder.__class__.forward = model.encoder.__class__.streaming_forward @@ -423,7 +473,11 @@ def export_streaming_ctc_model_onnx( }, ) - add_meta_data(filename=encoder_filename, meta_data=meta_data) + add_meta_data( + filename=encoder_filename, + meta_data=meta_data, + use_external_data=use_external_data, + ) @torch.no_grad() @@ -554,12 +608,18 @@ def main(): opset_version = 13 logging.info("Exporting model") - model_filename = params.exp_dir / f"ctc-{suffix}.onnx" + + if params.use_external_data: + model_filename = f"ctc-{suffix}.onnx" + else: + model_filename = params.exp_dir / f"ctc-{suffix}.onnx" + export_streaming_ctc_model_onnx( model, - model_filename, + str(model_filename), opset_version=opset_version, use_whisper_features=params.use_whisper_features, + use_external_data=params.use_external_data, ) logging.info(f"Exported model to {model_filename}") @@ -568,7 +628,11 @@ def main(): logging.info("Generate int8 quantization models") - model_filename_int8 = params.exp_dir / f"ctc-{suffix}.int8.onnx" + if params.use_external_data: + model_filename_int8 = f"ctc-{suffix}.int8.onnx" + else: + model_filename_int8 = params.exp_dir / f"ctc-{suffix}.int8.onnx" + quantize_dynamic( model_input=model_filename, model_output=model_filename_int8, @@ -576,6 +640,14 @@ def main(): weight_type=QuantType.QInt8, ) + if params.fp16: + if params.use_external_data: + model_filename_fp16 = f"ctc-{suffix}.fp16.onnx" + export_onnx_fp16_large_2gb(model_filename, model_filename_fp16) + else: + model_filename_fp16 = params.exp_dir / f"ctc-{suffix}.fp16.onnx" + export_onnx_fp16(model_filename, model_filename_fp16) + if __name__ == "__main__": formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index 780bc3c45..29541570b 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -169,12 +169,40 @@ def get_parser(): help="True to use whisper features. Must match the one used in training", ) + parser.add_argument( + "--use-external-data", + type=str2bool, + default=False, + help="Set it to true for model file size > 2GB", + ) + add_model_arguments(parser) return parser -def add_meta_data(filename: str, meta_data: Dict[str, str]): +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def export_onnx_fp16_large_2gb(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16_model_path + + onnx_fp16_model = convert_float_to_float16_model_path( + onnx_fp32_path, keep_io_types=True + ) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + +def add_meta_data( + filename: str, meta_data: Dict[str, str], use_external_data: bool = False +): """Add meta data to an ONNX model. It is changed in-place. Args: @@ -189,7 +217,19 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): meta.key = key meta.value = value - onnx.save(model, filename) + if use_external_data: + # For models file size > 2GB + external_filename = Path(filename).stem + + onnx.save( + model, + filename, + save_as_external_data=True, + all_tensors_to_one_file=True, + location=external_filename + ".weights", + ) + else: + onnx.save(model, filename) class OnnxEncoder(nn.Module): @@ -350,6 +390,7 @@ def export_encoder_model_onnx( opset_version: int = 11, feature_dim: int = 80, use_whisper_features: bool = False, + use_external_data: bool = False, ) -> None: encoder_model.encoder.__class__.forward = ( encoder_model.encoder.__class__.streaming_forward @@ -496,7 +537,11 @@ def export_encoder_model_onnx( }, ) - add_meta_data(filename=encoder_filename, meta_data=meta_data) + add_meta_data( + filename=encoder_filename, + meta_data=meta_data, + use_external_data=use_external_data, + ) def export_decoder_model_onnx( @@ -739,13 +784,17 @@ def main(): opset_version = 13 logging.info("Exporting encoder") - encoder_filename = params.exp_dir / f"encoder-{suffix}.onnx" + if params.use_external_data: + encoder_filename = f"encoder-{suffix}.onnx" + else: + encoder_filename = params.exp_dir / f"encoder-{suffix}.onnx" export_encoder_model_onnx( encoder, - encoder_filename, + str(encoder_filename), opset_version=opset_version, feature_dim=params.feature_dim, use_whisper_features=params.use_whisper_features, + use_external_data=params.use_external_data, ) logging.info(f"Exported encoder to {encoder_filename}") @@ -768,31 +817,31 @@ def main(): logging.info(f"Exported joiner to {joiner_filename}") if params.fp16: - from onnxconverter_common import float16 - logging.info("Generate fp16 models") - encoder = onnx.load(encoder_filename) - encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) - encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" - onnx.save(encoder_fp16, encoder_filename_fp16) + if params.use_external_data: + encoder_filename_fp16 = f"encoder-{suffix}.fp16.onnx" + export_onnx_fp16_large_2gb(encoder_filename, encoder_filename_fp16) + else: + encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" + export_onnx_fp16(encoder_filename, encoder_filename_fp16) - decoder = onnx.load(decoder_filename) - decoder_fp16 = float16.convert_float_to_float16(decoder, keep_io_types=True) decoder_filename_fp16 = params.exp_dir / f"decoder-{suffix}.fp16.onnx" - onnx.save(decoder_fp16, decoder_filename_fp16) + export_onnx_fp16(decoder_filename, decoder_filename_fp16) - joiner = onnx.load(joiner_filename) - joiner_fp16 = float16.convert_float_to_float16(joiner, keep_io_types=True) joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" - onnx.save(joiner_fp16, joiner_filename_fp16) + export_onnx_fp16(joiner_filename, joiner_filename_fp16) # Generate int8 quantization models # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection logging.info("Generate int8 quantization models") - encoder_filename_int8 = params.exp_dir / f"encoder-{suffix}.int8.onnx" + if params.use_external_data: + encoder_filename_int8 = f"encoder-{suffix}.int8.onnx" + else: + encoder_filename_int8 = params.exp_dir / f"encoder-{suffix}.int8.onnx" + quantize_dynamic( model_input=encoder_filename, model_output=encoder_filename_int8, diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index a56a7a3e6..03c7d6f82 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -70,7 +70,6 @@ import onnx import torch import torch.nn as nn from decoder import Decoder -from onnxconverter_common import float16 from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params @@ -182,6 +181,15 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): onnx.save(model, filename) +def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): + import onnxmltools + from onnxmltools.utils.float16_converter import convert_float_to_float16 + + onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) + onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) + onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) + + class OnnxEncoder(nn.Module): """A wrapper for Zipformer and the encoder_proj from the joiner""" @@ -595,20 +603,14 @@ def main(): if params.fp16: logging.info("Generate fp16 models") - encoder = onnx.load(encoder_filename) - encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" - onnx.save(encoder_fp16, encoder_filename_fp16) + export_onnx_fp16(encoder_filename, encoder_filename_fp16) - decoder = onnx.load(decoder_filename) - decoder_fp16 = float16.convert_float_to_float16(decoder, keep_io_types=True) decoder_filename_fp16 = params.exp_dir / f"decoder-{suffix}.fp16.onnx" - onnx.save(decoder_fp16, decoder_filename_fp16) + export_onnx_fp16(decoder_filename, decoder_filename_fp16) - joiner = onnx.load(joiner_filename) - joiner_fp16 = float16.convert_float_to_float16(joiner, keep_io_types=True) joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" - onnx.save(joiner_fp16, joiner_filename_fp16) + export_onnx_fp16(joiner_filename, joiner_filename_fp16) # Generate int8 quantization models # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection