Upload gigaspeech zipformer models in CI (#1412)

This commit is contained in:
Fangjun Kuang 2023-12-12 18:57:04 +08:00 committed by GitHub
parent 20a82c9abf
commit 9e9fe7954d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 85 additions and 11 deletions

View File

@ -45,7 +45,7 @@ log "----------------------------------------"
ls -lh $repo/exp ls -lh $repo/exp
log "------------------------------------------------------------" log "------------------------------------------------------------"
log "Test export streaming ONNX transducer models (Python code) " log "Test exported streaming ONNX transducer models (Python code)"
log "------------------------------------------------------------" log "------------------------------------------------------------"
log "test fp32" log "test fp32"
@ -73,6 +73,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url) dst=$(basename $url)
cp -v $repo/exp/*.onnx $dst cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst cd $dst

View File

@ -26,16 +26,80 @@ git lfs pull --include "data/lang_bpe_500/bpe.model"
git lfs pull --include "data/lang_bpe_500/tokens.txt" git lfs pull --include "data/lang_bpe_500/tokens.txt"
git lfs pull --include "exp/jit_script.pt" git lfs pull --include "exp/jit_script.pt"
git lfs pull --include "exp/pretrained.pt" git lfs pull --include "exp/pretrained.pt"
ln -s pretrained.pt epoch-99.pt rm epoch-30.pt
ls -lh *.pt ln -s pretrained.pt epoch-30.pt
rm *.onnx
ls -lh
popd popd
log "----------------------------------------"
log "Export ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_500/tokens.txt \
--use-averaged-model 0 \
--epoch 30 \
--avg 1 \
--exp-dir $repo/exp
ls -lh $repo/exp
log "------------------------------------------------------------"
log "Test exported ONNX transducer models (Python code) "
log "------------------------------------------------------------"
log "test fp32"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
log "test int8"
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
--tokens $repo/data/lang_bpe_500/tokens.txt \
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav
log "Upload models to huggingface"
git config --global user.name "k2-fsa"
git config --global user.email "xxx@gmail.com"
url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
cp -v $repo/data/lang_bpe_500/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx"
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh
mv -v $dst.tar.bz2 ../../../
log "Export to torchscript model" log "Export to torchscript model"
./zipformer/export.py \ ./zipformer/export.py \
--exp-dir $repo/exp \ --exp-dir $repo/exp \
--use-averaged-model false \ --use-averaged-model false \
--tokens $repo/data/lang_bpe_500/tokens.txt \ --tokens $repo/data/lang_bpe_500/tokens.txt \
--epoch 99 \ --epoch 30 \
--avg 1 \ --avg 1 \
--jit 1 --jit 1
@ -67,7 +131,7 @@ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p zipformer/exp mkdir -p zipformer/exp
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/ ln -s $PWD/$repo/data/lang_bpe_500 data/
ls -lh data ls -lh data
@ -83,7 +147,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
./zipformer/decode.py \ ./zipformer/decode.py \
--decoding-method $method \ --decoding-method $method \
--epoch 999 \ --epoch 30 \
--avg 1 \ --avg 1 \
--use-averaged-model 0 \ --use-averaged-model 0 \
--max-duration $max_duration \ --max-duration $max_duration \

View File

@ -2,11 +2,6 @@ name: run-multi-zh-hans
on: on:
push: push:
branches:
- master
- upload-ctc-model
pull_request:
branches: branches:
- master - master

View File

@ -21,6 +21,7 @@ on:
push: push:
branches: branches:
- master - master
pull_request: pull_request:
types: [labeled] types: [labeled]
@ -33,6 +34,8 @@ on:
# nightly build at 15:50 UTC time every day # nightly build at 15:50 UTC time every day
- cron: "50 15 * * *" - cron: "50 15 * * *"
workflow_dispatch:
concurrency: concurrency:
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }} group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
cancel-in-progress: true cancel-in-progress: true
@ -85,6 +88,7 @@ jobs:
env: env:
GITHUB_EVENT_NAME: ${{ github.event_name }} GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: | run: |
mkdir -p egs/gigaspeech/ASR/data mkdir -p egs/gigaspeech/ASR/data
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
@ -97,6 +101,16 @@ jobs:
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
file: ./*.tar.bz2
overwrite: true
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
- name: Display decoding results for gigaspeech zipformer - name: Display decoding results for gigaspeech zipformer
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
shell: bash shell: bash