mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
Upload gigaspeech zipformer models in CI (#1412)
This commit is contained in:
parent
20a82c9abf
commit
9e9fe7954d
3
.github/scripts/multi-zh-hans.sh
vendored
3
.github/scripts/multi-zh-hans.sh
vendored
@ -45,7 +45,7 @@ log "----------------------------------------"
|
|||||||
ls -lh $repo/exp
|
ls -lh $repo/exp
|
||||||
|
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "Test export streaming ONNX transducer models (Python code) "
|
log "Test exported streaming ONNX transducer models (Python code)"
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
|
|
||||||
log "test fp32"
|
log "test fp32"
|
||||||
@ -73,6 +73,7 @@ GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
|||||||
dst=$(basename $url)
|
dst=$(basename $url)
|
||||||
cp -v $repo/exp/*.onnx $dst
|
cp -v $repo/exp/*.onnx $dst
|
||||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||||
|
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
||||||
mkdir -p $dst/test_wavs
|
mkdir -p $dst/test_wavs
|
||||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||||
cd $dst
|
cd $dst
|
||||||
|
@ -26,16 +26,80 @@ git lfs pull --include "data/lang_bpe_500/bpe.model"
|
|||||||
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
||||||
git lfs pull --include "exp/jit_script.pt"
|
git lfs pull --include "exp/jit_script.pt"
|
||||||
git lfs pull --include "exp/pretrained.pt"
|
git lfs pull --include "exp/pretrained.pt"
|
||||||
ln -s pretrained.pt epoch-99.pt
|
rm epoch-30.pt
|
||||||
ls -lh *.pt
|
ln -s pretrained.pt epoch-30.pt
|
||||||
|
rm *.onnx
|
||||||
|
ls -lh
|
||||||
popd
|
popd
|
||||||
|
|
||||||
|
log "----------------------------------------"
|
||||||
|
log "Export ONNX transducer models "
|
||||||
|
log "----------------------------------------"
|
||||||
|
|
||||||
|
./zipformer/export-onnx.py \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
--use-averaged-model 0 \
|
||||||
|
--epoch 30 \
|
||||||
|
--avg 1 \
|
||||||
|
--exp-dir $repo/exp
|
||||||
|
|
||||||
|
ls -lh $repo/exp
|
||||||
|
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
log "Test exported ONNX transducer models (Python code) "
|
||||||
|
log "------------------------------------------------------------"
|
||||||
|
|
||||||
|
log "test fp32"
|
||||||
|
./zipformer/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
log "test int8"
|
||||||
|
./zipformer/onnx_pretrained.py \
|
||||||
|
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
|
||||||
|
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
||||||
|
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
|
||||||
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
|
$repo/test_wavs/1089-134686-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0001.wav \
|
||||||
|
$repo/test_wavs/1221-135766-0002.wav
|
||||||
|
|
||||||
|
log "Upload models to huggingface"
|
||||||
|
git config --global user.name "k2-fsa"
|
||||||
|
git config --global user.email "xxx@gmail.com"
|
||||||
|
|
||||||
|
url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
|
||||||
|
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||||
|
dst=$(basename $url)
|
||||||
|
cp -v $repo/exp/*.onnx $dst
|
||||||
|
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
|
||||||
|
cp -v $repo/data/lang_bpe_500/bpe.model $dst
|
||||||
|
mkdir -p $dst/test_wavs
|
||||||
|
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||||
|
cd $dst
|
||||||
|
git lfs track "*.onnx"
|
||||||
|
git add .
|
||||||
|
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||||
|
|
||||||
|
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||||
|
rm -rf .git
|
||||||
|
rm -fv .gitattributes
|
||||||
|
cd ..
|
||||||
|
tar cjfv $dst.tar.bz2 $dst
|
||||||
|
ls -lh
|
||||||
|
mv -v $dst.tar.bz2 ../../../
|
||||||
|
|
||||||
log "Export to torchscript model"
|
log "Export to torchscript model"
|
||||||
./zipformer/export.py \
|
./zipformer/export.py \
|
||||||
--exp-dir $repo/exp \
|
--exp-dir $repo/exp \
|
||||||
--use-averaged-model false \
|
--use-averaged-model false \
|
||||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||||
--epoch 99 \
|
--epoch 30 \
|
||||||
--avg 1 \
|
--avg 1 \
|
||||||
--jit 1
|
--jit 1
|
||||||
|
|
||||||
@ -67,7 +131,7 @@ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
|||||||
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
||||||
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
||||||
mkdir -p zipformer/exp
|
mkdir -p zipformer/exp
|
||||||
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
|
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
|
||||||
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
||||||
|
|
||||||
ls -lh data
|
ls -lh data
|
||||||
@ -83,7 +147,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
|
|||||||
|
|
||||||
./zipformer/decode.py \
|
./zipformer/decode.py \
|
||||||
--decoding-method $method \
|
--decoding-method $method \
|
||||||
--epoch 999 \
|
--epoch 30 \
|
||||||
--avg 1 \
|
--avg 1 \
|
||||||
--use-averaged-model 0 \
|
--use-averaged-model 0 \
|
||||||
--max-duration $max_duration \
|
--max-duration $max_duration \
|
||||||
|
5
.github/workflows/multi-zh-hans.yml
vendored
5
.github/workflows/multi-zh-hans.yml
vendored
@ -2,11 +2,6 @@ name: run-multi-zh-hans
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- upload-ctc-model
|
|
||||||
|
|
||||||
pull_request:
|
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [labeled]
|
types: [labeled]
|
||||||
|
|
||||||
@ -33,6 +34,8 @@ on:
|
|||||||
# nightly build at 15:50 UTC time every day
|
# nightly build at 15:50 UTC time every day
|
||||||
- cron: "50 15 * * *"
|
- cron: "50 15 * * *"
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
|
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
@ -85,6 +88,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
||||||
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
||||||
|
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
mkdir -p egs/gigaspeech/ASR/data
|
mkdir -p egs/gigaspeech/ASR/data
|
||||||
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
|
ln -sfv ~/tmp/fbank-libri egs/gigaspeech/ASR/data/fbank
|
||||||
@ -97,6 +101,16 @@ jobs:
|
|||||||
|
|
||||||
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
|
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
|
||||||
|
|
||||||
|
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
file: ./*.tar.bz2
|
||||||
|
overwrite: true
|
||||||
|
repo_name: k2-fsa/sherpa-onnx
|
||||||
|
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||||
|
tag: asr-models
|
||||||
|
|
||||||
- name: Display decoding results for gigaspeech zipformer
|
- name: Display decoding results for gigaspeech zipformer
|
||||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||||
shell: bash
|
shell: bash
|
||||||
|
Loading…
x
Reference in New Issue
Block a user