mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
Upload gigaspeech zipformer models in CI (#1412)
This commit is contained in:
parent
20a82c9abf
commit
9e9fe7954d
3
.github/scripts/multi-zh-hans.sh
vendored
3
.github/scripts/multi-zh-hans.sh
vendored
@ -45,7 +45,7 @@ log "----------------------------------------"
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test export streaming ONNX transducer models (Python code) "
|
||||
log "Test exported streaming ONNX transducer models (Python code)"
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
log "test fp32"
|
||||
@ -73,6 +73,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
|
@ -26,16 +26,80 @@ git lfs pull --include "data/lang_bpe_500/bpe.model"
|
||||
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
||||
git lfs pull --include "exp/jit_script.pt"
|
||||
git lfs pull --include "exp/pretrained.pt"
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
ls -lh *.pt
|
||||
rm epoch-30.pt
|
||||
ln -s pretrained.pt epoch-30.pt
|
||||
rm *.onnx
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx.py \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 30 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test exported ONNX transducer models (Python code) "
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
log "test fp32"
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
$repo/test_wavs/1089-134686-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0002.wav
|
||||
|
||||
log "test int8"
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
$repo/test_wavs/1089-134686-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0001.wav \
|
||||
$repo/test_wavs/1221-135766-0002.wav
|
||||
|
||||
log "Upload models to huggingface"
|
||||
git config --global user.name "k2-fsa"
|
||||
git config --global user.email "xxx@gmail.com"
|
||||
|
||||
url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
|
||||
cp -v $repo/data/lang_bpe_500/bpe.model $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx"
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
ls -lh
|
||||
mv -v $dst.tar.bz2 ../../../
|
||||
|
||||
log "Export to torchscript model"
|
||||
./zipformer/export.py \
|
||||
--exp-dir $repo/exp \
|
||||
--use-averaged-model false \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
--epoch 99 \
|
||||
--epoch 30 \
|
||||
--avg 1 \
|
||||
--jit 1
|
||||
|
||||
@ -67,7 +131,7 @@ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||
mkdir -p zipformer/exp
|
||||
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
|
||||
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
|
||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||
|
||||
ls -lh data
|
||||
@ -83,7 +147,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
|
||||
|
||||
./zipformer/decode.py \
|
||||
--decoding-method $method \
|
||||
--epoch 999 \
|
||||
--epoch 30 \
|
||||
--avg 1 \
|
||||
--use-averaged-model 0 \
|
||||
--max-duration $max_duration \
|
||||
|
5
.github/workflows/multi-zh-hans.yml
vendored
5
.github/workflows/multi-zh-hans.yml
vendored
@ -2,11 +2,6 @@ name: run-multi-zh-hans
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- upload-ctc-model
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
|
@ -21,6 +21,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
@ -33,6 +34,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@ -85,6 +88,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
mkdir -p egs/gigaspeech/ASR/data
|
||||
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
|
||||
@ -97,6 +101,16 @@ jobs:
|
||||
|
||||
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
|
||||
|
||||
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
file: ./*.tar.bz2
|
||||
overwrite: true
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
||||
|
||||
- name: Display decoding results for gigaspeech zipformer
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||
shell: bash
|
||||
|
Loading…
x
Reference in New Issue
Block a user