From 683ae6c2cc4672ce4b70c7054dca0f4a0780269b Mon Sep 17 00:00:00 2001 From: manickavela29 Date: Thu, 27 Jun 2024 06:01:27 +0000 Subject: [PATCH] extending to export-onnx.py Signed-off-by: manickavela29 --- .../ASR/zipformer/export-onnx-streaming.py | 9 +++---- egs/librispeech/ASR/zipformer/export-onnx.py | 26 +++++++++++++------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index b32609122..e5ceb3683 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -755,12 +755,8 @@ def main(): ) logging.info(f"Exported joiner to {joiner_filename}") - # Generate int8 quantization models - # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection - - if(params.fp16) : - logging.info("Exporting models in fp16") + logging.info("Generate fp16 models") encoder = onnx.load(encoder_filename) encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) @@ -777,6 +773,9 @@ def main(): joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" onnx.save(joiner_fp16,joiner_filename_fp16) + # Generate int8 quantization models + # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection + logging.info("Generate int8 quantization models") encoder_filename_int8 = params.exp_dir / f"encoder-{suffix}.int8.onnx" diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index c924be613..ed8a0ef0f 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -282,7 +282,6 @@ def export_encoder_model_onnx( encoder_model: OnnxEncoder, encoder_filename: str, opset_version: int = 11, - fp16:bool = False, ) -> None: """Export the given encoder model to ONNX format. The exported model has two inputs: @@ -334,12 +333,6 @@ def export_encoder_model_onnx( add_meta_data(filename=encoder_filename, meta_data=meta_data) - if(fp16) : - logging.info("Exporting Encoder model in fp16") - encoder = onnx.load(encoder_filename) - encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) - onnx.save(encoder_fp16,encoder_filename) - def export_decoder_model_onnx( decoder_model: OnnxDecoder, @@ -578,7 +571,6 @@ def main(): encoder, encoder_filename, opset_version=opset_version, - fp16=params.fp16, ) logging.info(f"Exported encoder to {encoder_filename}") @@ -600,6 +592,24 @@ def main(): ) logging.info(f"Exported joiner to {joiner_filename}") + if(params.fp16) : + logging.info("Generate fp16 models") + + encoder = onnx.load(encoder_filename) + encoder_fp16 = float16.convert_float_to_float16(encoder, keep_io_types=True) + encoder_filename_fp16 = params.exp_dir / f"encoder-{suffix}.fp16.onnx" + onnx.save(encoder_fp16,encoder_filename_fp16) + + decoder = onnx.load(decoder_filename) + decoder_fp16 = float16.convert_float_to_float16(decoder, keep_io_types=True) + decoder_filename_fp16 = params.exp_dir / f"decoder-{suffix}.fp16.onnx" + onnx.save(decoder_fp16,decoder_filename_fp16) + + joiner = onnx.load(joiner_filename) + joiner_fp16 = float16.convert_float_to_float16(joiner, keep_io_types=True) + joiner_filename_fp16 = params.exp_dir / f"joiner-{suffix}.fp16.onnx" + onnx.save(joiner_fp16,joiner_filename_fp16) + # Generate int8 quantization models # See https://onnxruntime.ai/docs/performance/model-optimizations/quantization.html#data-type-selection