minor fixes

This commit is contained in:
Fangjun Kuang 2025-03-03 14:27:42 +08:00
parent 7d6075b8e0
commit 0e749e4eb0
5 changed files with 329 additions and 144 deletions

View File

@ -14,8 +14,8 @@ cd egs/librispeech/ASR
# https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
# sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
function export_bilingual_zh_en() {
d=exp_zh_en
function export_2023_02_20() {
d=exp_2023_02_20
mkdir $d
pushd $d
@ -69,21 +69,20 @@ function export_bilingual_zh_en() {
--tokens $d/tokens.txt \
$d/1.wav
mkdir -p /icefall/rknn-models
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
mkdir -p $platform
dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
mkdir -p $dst
./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $platform/encoder.rknn \
--out-decoder $platform/decoder.rknn \
--out-joiner $platform/joiner.rknn \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform 2>/dev/null
ls -lh $platform/
ls -lh $dst/
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
@ -92,19 +91,24 @@ function export_bilingual_zh_en() {
--tokens $d/tokens.txt \
--wav $d/0.wav
cp $d/tokens.txt $platform
cp $d/*.wav $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
cp -av $platform /icefall/rknn-models
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
ls -lh /icefall/rknn-models
}
# https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
# sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
function export_bilingual_zh_en_small() {
d=exp_zh_en_small
function export_2023_02_16() {
d=exp_2023_02_16
mkdir $d
pushd $d
@ -161,21 +165,20 @@ function export_bilingual_zh_en_small() {
--tokens $d/tokens.txt \
$d/1.wav
mkdir -p /icefall/rknn-models-small
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
mkdir -p $platform
dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
mkdir -p $dst
./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $platform/encoder.rknn \
--out-decoder $platform/decoder.rknn \
--out-joiner $platform/joiner.rknn \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform 2>/dev/null
ls -lh $platform/
ls -lh $dst/
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
--encoder $d/encoder-epoch-99-avg-1.onnx \
@ -184,15 +187,84 @@ function export_bilingual_zh_en_small() {
--tokens $d/tokens.txt \
--wav $d/0.wav
cp $d/tokens.txt $platform
cp $d/*.wav $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
cp -av $platform /icefall/rknn-models-small
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
ls -lh /icefall/rknn-models-small
}
export_bilingual_zh_en_small
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
function export_2023_06_26() {
d=exp_2023_06_26
export_bilingual_zh_en
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
ls -lh $d/
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
mkdir -p $dst
./pruned_transducer_stateless7_streaming/export_rknn.py \
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform 2>/dev/null
ls -lh $dst/
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
export_2023_02_16
export_2023_02_20
export_2023_06_26

73
.github/scripts/multi_zh-hans/ASR/run_rknn.sh vendored Executable file
View File

@ -0,0 +1,73 @@
#!/usr/bin/env bash
set -ex
python3 -m pip install kaldi-native-fbank soundfile librosa
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi_zh-hans/ASR
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
function export_2023_11_05() {
d=exp
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
}
export_2023_11_05

View File

@ -13,8 +13,8 @@ log() {
cd egs/wenetspeech/ASR
#https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
function export_zh_small() {
d=exp_zh_small
function export_2025_03_02() {
d=exp_2025_03_02
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/data/lang_char/tokens.txt
@ -47,28 +47,150 @@ function export_zh_small() {
--left-context-frames 128 \
--causal 1
out=/icefall/rknn-models-small-wenetspeech
mkdir -p $out
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
mkdir -p $out/$platform
dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $out/$platform/encoder.rknn \
--out-decoder $out/$platform/decoder.rknn \
--out-joiner $out/$platform/joiner.rknn \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $out/$platform
cp $d/*.wav $out/$platform
ls -lh $out/$platform/
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
rm -rf $dst
done
ls -h $out
echo "---"
ls -h $out/*
rm -rf $d
}
export_zh_small
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-large-chinese
function export_2025_03_03() {
d=exp_2025_03_03
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2025-03-03
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
ls -lh $dst.tar.bz2
rm -rf $dst
done
rm -rf $d
}
function export_2023_06_15() {
d=exp_2023_06_15
mkdir $d
pushd $d
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
ls -lh
popd
./zipformer/export-onnx-streaming.py \
--dynamic-batch 0 \
--enable-int8-quantization 0 \
--tokens $d/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $d \
--use-ctc 0 \
--use-transducer 1 \
\
--chunk-size 32 \
--left-context-frames 128 \
--causal 1
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2023-06-15
mkdir -p $dst
./zipformer/export_rknn_transducer_streaming.py \
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
--out-encoder $dst/encoder.rknn \
--out-decoder $dst/decoder.rknn \
--out-joiner $dst/joiner.rknn \
--target-platform $platform
cp $d/tokens.txt $dst
mkdir $dst/test_wavs
cp $d/*.wav $dst/test_wavs
tar cjvf $dst.tar.bz2 $dst
ls -lh $dst.tar.bz2
mv $dst.tar.bz2 /icefall/
ls -lh $dst/
echo "---"
ls -lh $dst.tar.bz2
rm -rf $dst
done
}
export_2025_03_02
export_2025_03_03
export_2023_06_15

View File

@ -17,43 +17,25 @@ concurrency:
cancel-in-progress: true
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --torch-version=2.0.0 --python-version=3.10
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version=2.0.0 --python-version=3.10)
echo "::set-output name=matrix::${MATRIX}"
rknn:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
name: RKNN ${{ matrix.recipe }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
python-version: ["3.10"]
k2-version: ["1.24.4.dev20241029"]
kaldifeat-version: ["1.25.5.dev20241029"]
torch-version: ["2.0.0"]
torchaudio-version: ["2.0.1"]
version: ["20241218"]
recipe: ["librispeech", "wenetspeech", "multi_zh-hans"]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python
if: false
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Export RKNN model
uses: addnab/docker-run-action@v3
with:
@ -74,7 +56,6 @@ jobs:
python3 -m k2.version
pip list
# Install rknn
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install ./*.whl "numpy<=1.26.4"
@ -83,77 +64,13 @@ jobs:
pip list
echo "---"
.github/scripts/wenetspeech/ASR/run_rknn.sh >log-wenetspeech.txt
# .github/scripts/librispeech/ASR/run_rknn.sh >log-librispeech.txt
recipe=${{ matrix.recipe }}
.github/scripts/$recipe/ASR/run_rknn.sh > log-$recipe.txt 2>&1 || true
- name: Display rknn models (librispeech)
shell: bash
if: false
run: |
ls -lh
ls -lh rknn-models/*
echo "----"
ls -lh rknn-models-small/*
- name: Display rknn models (wenetspeech)
shell: bash
run: |
ls -lh rknn-models-small-wenetspeech/*
- name: Collect results (small wenetspeech)
shell: bash
run: |
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
mkdir $dst
mkdir $dst/test_wavs
src=rknn-models-small-wenetspeech/$platform
cp -v $src/*.rknn $dst/
cp -v $src/tokens.txt $dst/
cp -v $src/*.wav $dst/test_wavs/
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
rm -rf $dst
done
- name: Collect results (small librispeech)
if: false
shell: bash
run: |
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
mkdir $dst
mkdir $dst/test_wavs
src=rknn-models-small/$platform
cp -v $src/*.rknn $dst/
cp -v $src/tokens.txt $dst/
cp -v $src/*.wav $dst/test_wavs/
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
rm -rf $dst
done
- name: Collect results (librispeech)
if: false
shell: bash
run: |
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
mkdir $dst
mkdir $dst/test_wavs
src=rknn-models/$platform
cp -v $src/*.rknn $dst/
cp -v $src/tokens.txt $dst/
cp -v $src/*.wav $dst/test_wavs/
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
rm -rf $dst
done
- uses: actions/upload-artifact@v4
with:
name: log-${{ matrix.recipe }}
path: ./log-*.txt
- name: Display results
shell: bash
@ -171,7 +88,7 @@ jobs:
tag: asr-models
- name: Upload model to huggingface
if: github.event_name == 'push'
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3

View File

@ -0,0 +1 @@
../../../librispeech/ASR/zipformer/export_rknn_transducer_streaming.py