mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Add CI test for the AudioSet recipe. (#1585)
This commit is contained in:
parent
f5d7818733
commit
fa5d861af0
94
.github/scripts/audioset/AT/run.sh
vendored
Executable file
94
.github/scripts/audioset/AT/run.sh
vendored
Executable file
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
python3 -m pip install onnxoptimizer onnxsim
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
cd egs/audioset/AT
|
||||
|
||||
function test_pretrained() {
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
||||
repo=$(basename $repo_url)
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo/exp
|
||||
git lfs pull --include pretrained.pt
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "test pretrained.pt"
|
||||
|
||||
python3 zipformer/pretrained.py \
|
||||
--checkpoint $repo/exp/pretrained.pt \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
|
||||
log "test jit export"
|
||||
ls -lh $repo/exp/
|
||||
python3 zipformer/export.py \
|
||||
--exp-dir $repo/exp \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--use-averaged-model 0 \
|
||||
--jit 1
|
||||
ls -lh $repo/exp/
|
||||
|
||||
log "test jit models"
|
||||
python3 zipformer/jit_pretrained.py \
|
||||
--nn-model-filename $repo/exp/jit_script.pt \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
|
||||
log "test onnx export"
|
||||
ls -lh $repo/exp/
|
||||
python3 zipformer/export-onnx.py \
|
||||
--exp-dir $repo/exp \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--use-averaged-model 0
|
||||
|
||||
ls -lh $repo/exp/
|
||||
|
||||
pushd $repo/exp/
|
||||
mv model-epoch-99-avg-1.onnx model.onnx
|
||||
mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
|
||||
popd
|
||||
|
||||
ls -lh $repo/exp/
|
||||
|
||||
log "test onnx models"
|
||||
for m in model.onnx model.int8.onnx; do
|
||||
log "$m"
|
||||
python3 zipformer/onnx_pretrained.py \
|
||||
--model-filename $repo/exp/model.onnx \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
done
|
||||
|
||||
log "prepare data for uploading to huggingface"
|
||||
dst=/icefall/model-onnx
|
||||
mkdir -p $dst
|
||||
cp -v $repo/exp/*.onnx $dst/
|
||||
cp -v $repo/data/* $dst/
|
||||
cp -av $repo/test_wavs $dst
|
||||
|
||||
ls -lh $dst
|
||||
ls -lh $dst/test_wavs
|
||||
}
|
||||
|
||||
test_pretrained
|
2
.github/scripts/docker/Dockerfile
vendored
2
.github/scripts/docker/Dockerfile
vendored
@ -49,6 +49,8 @@ RUN pip install --no-cache-dir \
|
||||
multi_quantization \
|
||||
numba \
|
||||
numpy \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
onnx \
|
||||
onnxmltools \
|
||||
onnxruntime \
|
||||
|
137
.github/workflows/audioset.yml
vendored
Normal file
137
.github/workflows/audioset.yml
vendored
Normal file
@ -0,0 +1,137 @@
|
||||
name: audioset
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: audioset-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
generate_build_matrix:
|
||||
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
||||
# see https://github.com/pytorch/pytorch/pull/50633
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Generating build matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# outputting for debugging purposes
|
||||
python ./.github/scripts/docker/generate_build_matrix.py
|
||||
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
||||
echo "::set-output name=matrix::${MATRIX}"
|
||||
|
||||
audioset:
|
||||
needs: generate_build_matrix
|
||||
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
echo "pwd: $PWD"
|
||||
echo "github.workspace ${{ github.workspace }}"
|
||||
|
||||
- name: Run tests
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
||||
options: |
|
||||
--volume ${{ github.workspace }}/:/icefall
|
||||
shell: bash
|
||||
run: |
|
||||
export PYTHONPATH=/icefall:$PYTHONPATH
|
||||
cd /icefall
|
||||
git config --global --add safe.directory /icefall
|
||||
|
||||
.github/scripts/audioset/AT/run.sh
|
||||
|
||||
- name: Show model files
|
||||
shell: bash
|
||||
run: |
|
||||
sudo chown -R runner ./model-onnx
|
||||
ls -lh ./model-onnx
|
||||
chmod -x ./model-onnx/class_labels_indices.csv
|
||||
|
||||
echo "----------"
|
||||
ls -lh ./model-onnx/*
|
||||
|
||||
- name: Upload model to huggingface
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
with:
|
||||
max_attempts: 20
|
||||
timeout_seconds: 200
|
||||
shell: bash
|
||||
command: |
|
||||
git config --global user.email "csukuangfj@gmail.com"
|
||||
git config --global user.name "Fangjun Kuang"
|
||||
|
||||
rm -rf huggingface
|
||||
export GIT_LFS_SKIP_SMUDGE=1
|
||||
|
||||
git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
|
||||
cd huggingface
|
||||
git fetch
|
||||
git pull
|
||||
git merge -m "merge remote" --ff origin main
|
||||
cp ../model-onnx/*.onnx ./
|
||||
cp ../model-onnx/*.csv ./
|
||||
cp -a ../model-onnx/test_wavs ./
|
||||
ls -lh
|
||||
git add .
|
||||
git status
|
||||
git commit -m "update models"
|
||||
git status
|
||||
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
|
||||
rm -rf huggingface
|
||||
|
||||
- name: Prepare for release
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
|
||||
shell: bash
|
||||
run: |
|
||||
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
|
||||
mv ./model-onnx $d
|
||||
tar cjvf ${d}.tar.bz2 $d
|
||||
ls -lh
|
||||
|
||||
- name: Release exported onnx models
|
||||
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
overwrite: true
|
||||
file: sherpa-onnx-*.tar.bz2
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: audio-tagging-models
|
||||
|
@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -55,6 +55,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -69,6 +69,8 @@ RUN pip uninstall -y tqdm && \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -56,6 +56,8 @@ RUN pip install --no-cache-dir \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
|
@ -6,56 +6,28 @@
|
||||
"""
|
||||
This script exports a transducer model from PyTorch to ONNX.
|
||||
|
||||
We use the pre-trained model from
|
||||
https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
|
||||
as an example to show how to use this file.
|
||||
Usage of this script:
|
||||
|
||||
1. Download the pre-trained model
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
||||
repo=$(basename $repo_url)
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo/exp
|
||||
git lfs pull --include pretrained.pt
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
popd
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
python3 zipformer/export-onnx.py \
|
||||
--exp-dir $repo/exp \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--use-averaged-model 0
|
||||
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo/exp
|
||||
mv model-epoch-99-avg-1.onnx model.onnx
|
||||
mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
|
||||
popd
|
||||
|
||||
pushd $repo
|
||||
git lfs pull --include "exp/pretrained.pt"
|
||||
|
||||
cd exp
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
popd
|
||||
|
||||
2. Export the model to ONNX
|
||||
|
||||
./zipformer/export-onnx.py \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--num-encoder-layers "2,2,3,4,3,2" \
|
||||
--downsampling-factor "1,2,4,8,4,2" \
|
||||
--feedforward-dim "512,768,1024,1536,1024,768" \
|
||||
--num-heads "4,4,4,8,4,4" \
|
||||
--encoder-dim "192,256,384,512,384,256" \
|
||||
--query-head-dim 32 \
|
||||
--value-head-dim 12 \
|
||||
--pos-head-dim 4 \
|
||||
--pos-dim 48 \
|
||||
--encoder-unmasked-dim "192,192,256,256,256,192" \
|
||||
--cnn-module-kernel "31,31,15,15,15,31" \
|
||||
--decoder-dim 512 \
|
||||
--joiner-dim 512 \
|
||||
--causal False \
|
||||
--chunk-size "16,32,64,-1" \
|
||||
--left-context-frames "64,128,256,-1"
|
||||
|
||||
It will generate the following 3 files inside $repo/exp:
|
||||
|
||||
- encoder-epoch-99-avg-1.onnx
|
||||
- decoder-epoch-99-avg-1.onnx
|
||||
- joiner-epoch-99-avg-1.onnx
|
||||
|
||||
See ./onnx_pretrained.py and ./onnx_check.py for how to
|
||||
See ./onnx_pretrained.py
|
||||
use the exported ONNX models.
|
||||
"""
|
||||
|
||||
@ -66,9 +38,11 @@ from typing import Dict
|
||||
|
||||
import k2
|
||||
import onnx
|
||||
import onnxoptimizer
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from onnxruntime.quantization import QuantType, quantize_dynamic
|
||||
from onnxsim import simplify
|
||||
from scaling_converter import convert_scaled_to_non_scaled
|
||||
from train import add_model_arguments, get_model, get_params
|
||||
from zipformer import Zipformer2
|
||||
@ -261,6 +235,29 @@ def export_audio_tagging_model_onnx(
|
||||
add_meta_data(filename=filename, meta_data=meta_data)
|
||||
|
||||
|
||||
def optimize_model(filename):
|
||||
# see
|
||||
# https://github.com/microsoft/onnxruntime/issues/1899#issuecomment-534806537
|
||||
# and
|
||||
# https://github.com/onnx/onnx/issues/582#issuecomment-937788108
|
||||
# and
|
||||
# https://github.com/onnx/optimizer/issues/110
|
||||
# and
|
||||
# https://qiita.com/Yossy_Hal/items/34f3b2aef2199baf7f5f
|
||||
passes = ["eliminate_unused_initializer"]
|
||||
onnx_model = onnx.load(filename)
|
||||
onnx_model = onnxoptimizer.optimize(onnx_model, passes)
|
||||
|
||||
model_simp, check = simplify(onnx_model)
|
||||
if check:
|
||||
logging.info("Simplified the model!")
|
||||
onnx_model = model_simp
|
||||
else:
|
||||
logging.info("Failed to simplify the model!")
|
||||
|
||||
onnx.save(onnx_model, filename)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def main():
|
||||
args = get_parser().parse_args()
|
||||
@ -389,6 +386,7 @@ def main():
|
||||
model_filename,
|
||||
opset_version=opset_version,
|
||||
)
|
||||
optimize_model(model_filename)
|
||||
logging.info(f"Exported audio tagging model to {model_filename}")
|
||||
|
||||
# Generate int8 quantization models
|
||||
@ -403,6 +401,7 @@ def main():
|
||||
op_types_to_quantize=["MatMul"],
|
||||
weight_type=QuantType.QInt8,
|
||||
)
|
||||
optimize_model(model_filename_int8)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
Usage:
|
||||
|
||||
Note: This is a example for librispeech dataset, if you are using different
|
||||
Note: This is an example for AudioSet dataset, if you are using different
|
||||
dataset, you should change the argument values according to your dataset.
|
||||
|
||||
(1) Export to torchscript model using torch.jit.script()
|
||||
@ -42,6 +42,7 @@ load it by `torch.jit.load("jit_script.pt")`.
|
||||
Check ./jit_pretrained.py for its usage.
|
||||
|
||||
Check https://github.com/k2-fsa/sherpa
|
||||
and https://github.com/k2-fsa/sherpa-onnx
|
||||
for how to use the exported models outside of icefall.
|
||||
|
||||
(2) Export `model.state_dict()`
|
||||
@ -55,13 +56,13 @@ for how to use the exported models outside of icefall.
|
||||
It will generate a file `pretrained.pt` in the given `exp_dir`. You can later
|
||||
load it by `icefall.checkpoint.load_checkpoint()`.
|
||||
|
||||
To use the generated file with `zipformer/decode.py`,
|
||||
To use the generated file with `zipformer/evaluate.py`,
|
||||
you can do:
|
||||
|
||||
cd /path/to/exp_dir
|
||||
ln -s pretrained.pt epoch-9999.pt
|
||||
|
||||
cd /path/to/egs/librispeech/ASR
|
||||
cd /path/to/egs/audioset/AT
|
||||
./zipformer/evaluate.py \
|
||||
--exp-dir ./zipformer/exp \
|
||||
--use-averaged-model False \
|
||||
|
@ -28,10 +28,20 @@ You can use the following command to get the exported models:
|
||||
|
||||
Usage of this script:
|
||||
|
||||
./zipformer/jit_pretrained.py \
|
||||
--nn-model-filename ./zipformer/exp/cpu_jit.pt \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
||||
repo=$(basename $repo_url)
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo/exp
|
||||
git lfs pull --include jit_script.pt
|
||||
popd
|
||||
|
||||
python3 zipformer/jit_pretrained.py \
|
||||
--nn-model-filename $repo/exp/jit_script.pt \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@ -168,7 +178,8 @@ def main():
|
||||
topk_prob, topk_index = logit.sigmoid().topk(5)
|
||||
topk_labels = [label_dict[index.item()] for index in topk_index]
|
||||
logging.info(
|
||||
f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}"
|
||||
f"{filename}: Top 5 predicted labels are {topk_labels} with "
|
||||
f"probability of {topk_prob.tolist()}"
|
||||
)
|
||||
|
||||
logging.info("Done")
|
||||
|
@ -17,48 +17,25 @@
|
||||
# limitations under the License.
|
||||
"""
|
||||
This script loads ONNX models and uses them to decode waves.
|
||||
You can use the following command to get the exported models:
|
||||
|
||||
We use the pre-trained model from
|
||||
https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
|
||||
as an example to show how to use this file.
|
||||
Usage of this script:
|
||||
|
||||
1. Download the pre-trained model
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
||||
repo=$(basename $repo_url)
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo/exp
|
||||
git lfs pull --include "*.onnx"
|
||||
popd
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12#/
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
git lfs pull --include "exp/pretrained.pt"
|
||||
|
||||
cd exp
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
popd
|
||||
|
||||
2. Export the model to ONNX
|
||||
|
||||
./zipformer/export-onnx.py \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--causal False
|
||||
|
||||
It will generate the following 3 files inside $repo/exp:
|
||||
|
||||
- model-epoch-99-avg-1.onnx
|
||||
|
||||
3. Run this file
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--model-filename $repo/exp/model-epoch-99-avg-1.onnx \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
$repo/test_wavs/1089-134686-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0002.wav
|
||||
for m in model.onnx model.int8.onnx; do
|
||||
python3 zipformer/onnx_pretrained.py \
|
||||
--model-filename $repo/exp/model.onnx \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
done
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
@ -18,27 +18,25 @@
|
||||
This script loads a checkpoint and uses it to decode waves.
|
||||
You can generate the checkpoint with the following command:
|
||||
|
||||
Note: This is a example for librispeech dataset, if you are using different
|
||||
Note: This is an example for the AudioSet dataset, if you are using different
|
||||
dataset, you should change the argument values according to your dataset.
|
||||
|
||||
|
||||
./zipformer/export.py \
|
||||
--exp-dir ./zipformer/exp \
|
||||
--tokens data/lang_bpe_500/tokens.txt \
|
||||
--epoch 30 \
|
||||
--avg 9
|
||||
|
||||
Usage of this script:
|
||||
|
||||
./zipformer/pretrained.py \
|
||||
--checkpoint ./zipformer/exp/pretrained.pt \
|
||||
/path/to/foo.wav \
|
||||
/path/to/bar.wav
|
||||
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
||||
repo=$(basename $repo_url)
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
pushd $repo/exp
|
||||
git lfs pull --include pretrained.pt
|
||||
popd
|
||||
|
||||
|
||||
You can also use `./zipformer/exp/epoch-xx.pt`.
|
||||
|
||||
Note: ./zipformer/exp/pretrained.pt is generated by ./zipformer/export.py
|
||||
python3 zipformer/pretrained.py \
|
||||
--checkpoint $repo/exp/pretrained.pt \
|
||||
--label-dict $repo/data/class_labels_indices.csv \
|
||||
$repo/test_wavs/1.wav \
|
||||
$repo/test_wavs/2.wav \
|
||||
$repo/test_wavs/3.wav \
|
||||
$repo/test_wavs/4.wav
|
||||
"""
|
||||
|
||||
|
||||
@ -189,7 +187,8 @@ def main():
|
||||
topk_prob, topk_index = logit.sigmoid().topk(5)
|
||||
topk_labels = [label_dict[index.item()] for index in topk_index]
|
||||
logging.info(
|
||||
f"{filename}: Top 5 predicted labels are {topk_labels} with probability of {topk_prob.tolist()}"
|
||||
f"{filename}: Top 5 predicted labels are {topk_labels} with "
|
||||
f"probability of {topk_prob.tolist()}"
|
||||
)
|
||||
|
||||
logging.info("Done")
|
||||
@ -199,4 +198,5 @@ if __name__ == "__main__":
|
||||
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
||||
|
||||
logging.basicConfig(format=formatter, level=logging.INFO)
|
||||
|
||||
main()
|
||||
|
@ -8,13 +8,14 @@ pypinyin==0.50.0
|
||||
tensorboard
|
||||
typeguard
|
||||
dill
|
||||
onnx==1.15.0
|
||||
onnxruntime==1.16.3
|
||||
onnx>=1.15.0
|
||||
onnxruntime>=1.16.3
|
||||
onnxoptimizer
|
||||
|
||||
# style check session:
|
||||
black==22.3.0
|
||||
isort==5.10.1
|
||||
flake8==5.0.4
|
||||
flake8==5.0.4
|
||||
|
||||
# cantonese word segment support
|
||||
pycantonese==3.4.0
|
||||
pycantonese==3.4.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user