Add CI test for the AudioSet recipe. (#1585)

2025-12-08 21:45:27 +00:00 · 2024-04-09 17:45:00 +08:00 · 2024-04-09 17:45:00 +08:00 · fa5d861af0
commit fa5d861af0
parent f5d7818733
21 changed files with 360 additions and 114 deletions
--- a/.github/scripts/audioset/AT/run.sh
+++ b/.github/scripts/audioset/AT/run.sh
@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+
+set -ex
+
+python3 -m pip install onnxoptimizer onnxsim
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/audioset/AT
+
+function test_pretrained() {
+  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
+  repo=$(basename $repo_url)
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  pushd $repo/exp
+  git lfs pull --include pretrained.pt
+  ln -s pretrained.pt epoch-99.pt
+  ls -lh
+  popd
+
+  log "test pretrained.pt"
+
+  python3 zipformer/pretrained.py \
+    --checkpoint $repo/exp/pretrained.pt \
+    --label-dict $repo/data/class_labels_indices.csv \
+    $repo/test_wavs/1.wav \
+    $repo/test_wavs/2.wav \
+    $repo/test_wavs/3.wav \
+    $repo/test_wavs/4.wav
+
+  log "test jit export"
+  ls -lh $repo/exp/
+  python3 zipformer/export.py \
+      --exp-dir $repo/exp \
+      --epoch 99 \
+      --avg 1 \
+      --use-averaged-model 0 \
+      --jit 1
+  ls -lh $repo/exp/
+
+  log "test jit models"
+  python3 zipformer/jit_pretrained.py \
+      --nn-model-filename $repo/exp/jit_script.pt \
+      --label-dict $repo/data/class_labels_indices.csv \
+      $repo/test_wavs/1.wav \
+      $repo/test_wavs/2.wav \
+      $repo/test_wavs/3.wav \
+      $repo/test_wavs/4.wav
+
+  log "test onnx export"
+  ls -lh $repo/exp/
+  python3 zipformer/export-onnx.py \
+      --exp-dir $repo/exp \
+      --epoch 99 \
+      --avg 1 \
+      --use-averaged-model 0
+
+  ls -lh $repo/exp/
+
+  pushd $repo/exp/
+  mv model-epoch-99-avg-1.onnx model.onnx
+  mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
+  popd
+
+  ls -lh $repo/exp/
+
+  log "test onnx models"
+  for m in model.onnx model.int8.onnx; do
+    log "$m"
+    python3 zipformer/onnx_pretrained.py \
+        --model-filename $repo/exp/model.onnx \
+        --label-dict $repo/data/class_labels_indices.csv \
+        $repo/test_wavs/1.wav \
+        $repo/test_wavs/2.wav \
+        $repo/test_wavs/3.wav \
+        $repo/test_wavs/4.wav
+  done
+
+  log "prepare data for uploading to huggingface"
+  dst=/icefall/model-onnx
+  mkdir -p $dst
+  cp -v $repo/exp/*.onnx $dst/
+  cp -v $repo/data/* $dst/
+  cp -av $repo/test_wavs $dst
+
+  ls -lh $dst
+  ls -lh $dst/test_wavs
+}
+
+test_pretrained
--- a/.github/scripts/docker/Dockerfile
+++ b/.github/scripts/docker/Dockerfile
@ -49,6 +49,8 @@ RUN pip install --no-cache-dir \
      multi_quantization \
      numba \
      numpy \
+      onnxoptimizer \
+      onnxsim \
      onnx \
      onnxmltools \
      onnxruntime \
--- a/.github/workflows/audioset.yml
+++ b/.github/workflows/audioset.yml
@ -0,0 +1,137 @@
+name: audioset
+
+on:
+  push:
+    branches:
+      - master
+
+  pull_request:
+    branches:
+      - master
+
+  workflow_dispatch:
+
+concurrency:
+  group: audioset-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
+
+  audioset:
+    needs: generate_build_matrix
+    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Free space
+        shell: bash
+        run: |
+          ls -lh
+          df -h
+          rm -rf /opt/hostedtoolcache
+          df -h
+          echo "pwd: $PWD"
+          echo "github.workspace ${{ github.workspace }}"
+
+      - name: Run tests
+        uses: addnab/docker-run-action@v3
+        with:
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+            options: |
+              --volume ${{ github.workspace }}/:/icefall
+            shell: bash
+            run: |
+              export PYTHONPATH=/icefall:$PYTHONPATH
+              cd /icefall
+              git config --global --add safe.directory /icefall
+
+              .github/scripts/audioset/AT/run.sh
+
+      - name: Show model files
+        shell: bash
+        run: |
+          sudo chown -R runner ./model-onnx
+          ls -lh ./model-onnx
+          chmod -x ./model-onnx/class_labels_indices.csv
+
+          echo "----------"
+          ls -lh ./model-onnx/*
+
+      - name: Upload model to huggingface
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        uses: nick-fields/retry@v3
+        with:
+          max_attempts: 20
+          timeout_seconds: 200
+          shell: bash
+          command: |
+            git config --global user.email "csukuangfj@gmail.com"
+            git config --global user.name "Fangjun Kuang"
+
+            rm -rf huggingface
+            export GIT_LFS_SKIP_SMUDGE=1
+
+            git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
+            cd huggingface
+            git fetch
+            git pull
+            git merge -m "merge remote" --ff origin main
+            cp ../model-onnx/*.onnx ./
+            cp ../model-onnx/*.csv ./
+            cp -a ../model-onnx/test_wavs ./
+            ls -lh
+            git add .
+            git status
+            git commit -m "update models"
+            git status
+
+            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
+            rm -rf huggingface
+
+      - name: Prepare for release
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
+        shell: bash
+        run: |
+          d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
+          mv ./model-onnx $d
+          tar cjvf ${d}.tar.bz2 $d
+          ls -lh
+
+      - name: Release exported onnx models
+        if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: sherpa-onnx-*.tar.bz2
+          repo_name: k2-fsa/sherpa-onnx
+          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+          tag: audio-tagging-models
+
--- a/docker/torch1.12.1-cuda11.3.dockerfile
+++ b/docker/torch1.12.1-cuda11.3.dockerfile
@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch1.13.0-cuda11.6.dockerfile
+++ b/docker/torch1.13.0-cuda11.6.dockerfile
@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch1.9.0-cuda10.2.dockerfile
+++ b/docker/torch1.9.0-cuda10.2.dockerfile
@ -69,6 +69,8 @@ RUN pip uninstall -y tqdm && \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.0.0-cuda11.7.dockerfile
+++ b/docker/torch2.0.0-cuda11.7.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.1.0-cuda11.8.dockerfile
+++ b/docker/torch2.1.0-cuda11.8.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.1.0-cuda12.1.dockerfile
+++ b/docker/torch2.1.0-cuda12.1.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.0-cuda11.8.dockerfile
+++ b/docker/torch2.2.0-cuda11.8.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.0-cuda12.1.dockerfile
+++ b/docker/torch2.2.0-cuda12.1.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.1-cuda11.8.dockerfile
+++ b/docker/torch2.2.1-cuda11.8.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.1-cuda12.1.dockerfile
+++ b/docker/torch2.2.1-cuda12.1.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.2-cuda11.8.dockerfile
+++ b/docker/torch2.2.2-cuda11.8.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/docker/torch2.2.2-cuda12.1.dockerfile
+++ b/docker/torch2.2.2-cuda12.1.dockerfile
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
      onnx \
      onnxruntime \
      onnxmltools \
+      onnxoptimizer \
+      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
--- a/egs/audioset/AT/zipformer/export-onnx.py
+++ b/egs/audioset/AT/zipformer/export-onnx.py
@ -6,56 +6,28 @@
 """
 This script exports a transducer model from PyTorch to ONNX.

-We use the pre-trained model from
-https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
-as an example to show how to use this file.
+Usage of this script:

-1. Download the pre-trained model
+  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
+  repo=$(basename $repo_url)
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  pushd $repo/exp
+  git lfs pull --include pretrained.pt
+  ln -s pretrained.pt epoch-99.pt
+  popd

-cd egs/librispeech/ASR
+  python3 zipformer/export-onnx.py \
+      --exp-dir $repo/exp \
+      --epoch 99 \
+      --avg 1 \
+      --use-averaged-model 0

-repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
-GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
-repo=$(basename $repo_url)
+  pushd $repo/exp
+  mv model-epoch-99-avg-1.onnx model.onnx
+  mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
+  popd

-pushd $repo
-git lfs pull --include "exp/pretrained.pt"
-
-cd exp
-ln -s pretrained.pt epoch-99.pt
-popd
-
-2. Export the model to ONNX
-
-./zipformer/export-onnx.py \
-  --use-averaged-model 0 \
-  --epoch 99 \
-  --avg 1 \
-  --exp-dir $repo/exp \
-  --num-encoder-layers "2,2,3,4,3,2" \
-  --downsampling-factor "1,2,4,8,4,2" \
-  --feedforward-dim "512,768,1024,1536,1024,768" \
-  --num-heads "4,4,4,8,4,4" \
-  --encoder-dim "192,256,384,512,384,256" \
-  --query-head-dim 32 \
-  --value-head-dim 12 \
-  --pos-head-dim 4 \
-  --pos-dim 48 \
-  --encoder-unmasked-dim "192,192,256,256,256,192" \
-  --cnn-module-kernel "31,31,15,15,15,31" \
-  --decoder-dim 512 \
-  --joiner-dim 512 \
-  --causal False \
-  --chunk-size "16,32,64,-1" \
-  --left-context-frames "64,128,256,-1"
-
-It will generate the following 3 files inside $repo/exp:
-
-  - encoder-epoch-99-avg-1.onnx
-  - decoder-epoch-99-avg-1.onnx
-  - joiner-epoch-99-avg-1.onnx
-
-See ./onnx_pretrained.py and ./onnx_check.py for how to
+See ./onnx_pretrained.py
 use the exported ONNX models.
 """

@ -66,9 +38,11 @@ from typing import Dict

 import k2
 import onnx
+import onnxoptimizer
 import torch
 import torch.nn as nn
 from onnxruntime.quantization import QuantType, quantize_dynamic
+from onnxsim import simplify
 from scaling_converter import convert_scaled_to_non_scaled
 from train import add_model_arguments, get_model, get_params
 from zipformer import Zipformer2
@ -261,6 +235,29 @@ def export_audio_tagging_model_onnx(
    add_meta_data(filename=filename, meta_data=meta_data)


+def optimize_model(filename):
+    # see
+    # https://github.com/microsoft/onnxruntime/issues/1899#issuecomment-534806537
+    # and
+    # https://github.com/onnx/onnx/issues/582#issuecomment-937788108
+    # and
+    # https://github.com/onnx/optimizer/issues/110
+    # and
+    # https://qiita.com/Yossy_Hal/items/34f3b2aef2199baf7f5f
+    passes = ["eliminate_unused_initializer"]
+    onnx_model = onnx.load(filename)
+    onnx_model = onnxoptimizer.optimize(onnx_model, passes)
+
+    model_simp, check = simplify(onnx_model)
+    if check:
+        logging.info("Simplified the model!")
+        onnx_model = model_simp
+    else:
+        logging.info("Failed to simplify the model!")
+
+    onnx.save(onnx_model, filename)
+
+
@torch.no_grad()
 def main():
    args = get_parser().parse_args()
@ -389,6 +386,7 @@ def main():
        model_filename,
        opset_version=opset_version,
    )
+    optimize_model(model_filename)
    logging.info(f"Exported audio tagging model to {model_filename}")

    # Generate int8 quantization models
@ -403,6 +401,7 @@ def main():
        op_types_to_quantize=["MatMul"],
        weight_type=QuantType.QInt8,
    )
+    optimize_model(model_filename_int8)


 if __name__ == "__main__":
--- a/egs/audioset/AT/zipformer/export.py
+++ b/egs/audioset/AT/zipformer/export.py
@ -25,7 +25,7 @@

 Usage:

-Note: This is a example for librispeech dataset, if you are using different
+Note: This is an example for AudioSet dataset, if you are using different
 dataset, you should change the argument values according to your dataset.

 (1) Export to torchscript model using torch.jit.script()
@ -42,6 +42,7 @@ load it by `torch.jit.load("jit_script.pt")`.
 Check ./jit_pretrained.py for its usage.

 Check https://github.com/k2-fsa/sherpa
+and https://github.com/k2-fsa/sherpa-onnx
 for how to use the exported models outside of icefall.

 (2) Export `model.state_dict()`
@ -55,13 +56,13 @@ for how to use the exported models outside of icefall.
 It will generate a file `pretrained.pt` in the given `exp_dir`. You can later
 load it by `icefall.checkpoint.load_checkpoint()`.

-To use the generated file with `zipformer/decode.py`,
+To use the generated file with `zipformer/evaluate.py`,
 you can do:

    cd /path/to/exp_dir
    ln -s pretrained.pt epoch-9999.pt

-    cd /path/to/egs/librispeech/ASR
+    cd /path/to/egs/audioset/AT
    ./zipformer/evaluate.py \
        --exp-dir ./zipformer/exp \
        --use-averaged-model False \
--- a/egs/audioset/AT/zipformer/jit_pretrained.py
+++ b/egs/audioset/AT/zipformer/jit_pretrained.py
@ -28,10 +28,20 @@ You can use the following command to get the exported models:

 Usage of this script:

-./zipformer/jit_pretrained.py \
-  --nn-model-filename ./zipformer/exp/cpu_jit.pt \
-  /path/to/foo.wav \
-  /path/to/bar.wav
+  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
+  repo=$(basename $repo_url)
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  pushd $repo/exp
+  git lfs pull --include jit_script.pt
+  popd
+
+  python3 zipformer/jit_pretrained.py \
+      --nn-model-filename $repo/exp/jit_script.pt \
+      --label-dict $repo/data/class_labels_indices.csv \
+      $repo/test_wavs/1.wav \
+      $repo/test_wavs/2.wav \
+      $repo/test_wavs/3.wav \
+      $repo/test_wavs/4.wav
 """

 import argparse
@ -168,7 +178,8 @@ def main():
        topk_prob, topk_index = logit.sigmoid().topk(5)
        topk_labels = [label_dict[index.item()] for index in topk_index]
        logging.info(
-            f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}"
+            f"{filename}: Top 5 predicted labels are {topk_labels} with "
+            f"probability of {topk_prob.tolist()}"
        )

    logging.info("Done")
--- a/egs/audioset/AT/zipformer/onnx_pretrained.py
+++ b/egs/audioset/AT/zipformer/onnx_pretrained.py
@ -17,48 +17,25 @@
 # limitations under the License.
 """
 This script loads ONNX models and uses them to decode waves.
-You can use the following command to get the exported models:

-We use the pre-trained model from
-https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
-as an example to show how to use this file.
+Usage of this script:

-1. Download the pre-trained model
+  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
+  repo=$(basename $repo_url)
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  pushd $repo/exp
+  git lfs pull --include "*.onnx"
+  popd

-cd egs/librispeech/ASR
-
-repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
-GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
-repo=$(basename $repo_url)
-
-pushd $repo
-git lfs pull --include "exp/pretrained.pt"
-
-cd exp
-ln -s pretrained.pt epoch-99.pt
-popd
-
-2. Export the model to ONNX
-
-./zipformer/export-onnx.py \
-  --use-averaged-model 0 \
-  --epoch 99 \
-  --avg 1 \
-  --exp-dir $repo/exp \
-  --causal False
-
-It will generate the following 3 files inside $repo/exp:
-
-  - model-epoch-99-avg-1.onnx
-
-3. Run this file
-
-./zipformer/onnx_pretrained.py \
-  --model-filename $repo/exp/model-epoch-99-avg-1.onnx \
-  --tokens $repo/data/lang_bpe_500/tokens.txt \
-  $repo/test_wavs/1089-134686-0001.wav \
-  $repo/test_wavs/1221-135766-0001.wav \
-  $repo/test_wavs/1221-135766-0002.wav
+  for m in model.onnx model.int8.onnx; do
+    python3 zipformer/onnx_pretrained.py \
+        --model-filename $repo/exp/model.onnx \
+        --label-dict $repo/data/class_labels_indices.csv \
+        $repo/test_wavs/1.wav \
+        $repo/test_wavs/2.wav \
+        $repo/test_wavs/3.wav \
+        $repo/test_wavs/4.wav
+  done
 """

 import argparse
--- a/egs/audioset/AT/zipformer/pretrained.py
+++ b/egs/audioset/AT/zipformer/pretrained.py
@ -18,27 +18,25 @@
 This script loads a checkpoint and uses it to decode waves.
 You can generate the checkpoint with the following command:

-Note: This is a example for librispeech dataset, if you are using different
+Note: This is an example for the AudioSet dataset, if you are using different
 dataset, you should change the argument values according to your dataset.

-
-./zipformer/export.py \
-  --exp-dir ./zipformer/exp \
-  --tokens data/lang_bpe_500/tokens.txt \
-  --epoch 30 \
-  --avg 9
-
 Usage of this script:

-./zipformer/pretrained.py \
-  --checkpoint ./zipformer/exp/pretrained.pt \
-  /path/to/foo.wav \
-  /path/to/bar.wav
+  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
+  repo=$(basename $repo_url)
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  pushd $repo/exp
+  git lfs pull --include pretrained.pt
+  popd

-
-You can also use `./zipformer/exp/epoch-xx.pt`.
-
-Note: ./zipformer/exp/pretrained.pt is generated by ./zipformer/export.py
+  python3 zipformer/pretrained.py \
+    --checkpoint $repo/exp/pretrained.pt \
+    --label-dict $repo/data/class_labels_indices.csv \
+    $repo/test_wavs/1.wav \
+    $repo/test_wavs/2.wav \
+    $repo/test_wavs/3.wav \
+    $repo/test_wavs/4.wav
 """


@ -189,7 +187,8 @@ def main():
        topk_prob, topk_index = logit.sigmoid().topk(5)
        topk_labels = [label_dict[index.item()] for index in topk_index]
        logging.info(
-            f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}"
+            f"{filename}: Top 5 predicted labels are {topk_labels} with "
+            f"probability of {topk_prob.tolist()}"
        )

    logging.info("Done")
@ -199,4 +198,5 @@ if __name__ == "__main__":
    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"

    logging.basicConfig(format=formatter, level=logging.INFO)
+
    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -8,13 +8,14 @@ pypinyin==0.50.0
 tensorboard
 typeguard
 dill
-onnx==1.15.0
-onnxruntime==1.16.3
+onnx>=1.15.0
+onnxruntime>=1.16.3
+onnxoptimizer

 # style check session:
 black==22.3.0
 isort==5.10.1
-flake8==5.0.4 
+flake8==5.0.4

 # cantonese word segment support
-pycantonese==3.4.0
+pycantonese==3.4.0