added CI tests for multi_zh-hans

This commit is contained in:
jinzr 2023-09-07 15:47:03 +08:00
parent 3aa5104dd0
commit d280badcca
2 changed files with 206 additions and 0 deletions

122
.github/scripts/run-multi-zh_hans-zipformer.sh vendored Executable file
View File

@ -0,0 +1,122 @@
#!/usr/bin/env bash
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi-zh_hans/ASR
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
log "Downloading pre-trained model from $repo_url"
git lfs install
git clone $repo_url
repo=$(basename $repo_url)
log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav
pushd $repo/exp
ln -s epoch-20.pt epoch-99.pt
popd
log "Test exporting to ONNX format"
./zipformer/export.py \
--exp-dir $repo/exp \
--lang-dir $repo/data/lang_bpe_2000 \
--epoch 99 \
--avg 1 \
--onnx 1
log "Export to torchscript model"
./zipformer/export.py \
--exp-dir $repo/exp \
--lang-dir $repo/data/lang_bpe_2000 \
--epoch 99 \
--avg 1 \
--jit 1
./zipformer/export.py \
--exp-dir $repo/exp \
--lang-dir $repo/data/lang_bpe_2000 \
--epoch 99 \
--avg 1 \
--jit-trace 1
ls -lh $repo/exp/*.onnx
ls -lh $repo/exp/*.pt
log "Decode with ONNX models"
./zipformer/onnx_check.py \
--jit-filename $repo/exp/cpu_jit.pt \
--onnx-encoder-filename $repo/exp/encoder.onnx \
--onnx-decoder-filename $repo/exp/decoder.onnx \
--onnx-joiner-filename $repo/exp/joiner.onnx \
--onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \
--onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx
./zipformer/onnx_pretrained.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--encoder-model-filename $repo/exp/encoder.onnx \
--decoder-model-filename $repo/exp/decoder.onnx \
--joiner-model-filename $repo/exp/joiner.onnx \
--joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
--joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
log "Decode with models exported by torch.jit.trace()"
./zipformer/jit_pretrained.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
./zipformer/jit_pretrained.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--encoder-model-filename $repo/exp/encoder_jit_script.pt \
--decoder-model-filename $repo/exp/decoder_jit_script.pt \
--joiner-model-filename $repo/exp/joiner_jit_script.pt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
for sym in 1 2 3; do
log "Greedy search with --max-sym-per-frame $sym"
./zipformer/pretrained.py \
--checkpoint $repo/exp/epoch-99.pt \
--lang-dir $repo/data/lang_bpe_2000 \
--decoding-method greedy_search \
--max-sym-per-frame $sym \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done
for method in modified_beam_search beam_search fast_beam_search; do
log "$method"
./zipformer/pretrained.py \
--decoding-method $method \
--beam-size 4 \
--checkpoint $repo/exp/epoch-99.pt \
--lang-dir $repo/data/lang_bpe_2000 \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav
done

View File

@ -0,0 +1,84 @@
# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
# See ../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: run-multi-zh_hans-zipformer
on:
push:
branches:
- master
pull_request:
types: [labeled]
concurrency:
group: run_multi-zh_hans_zipformer-${{ github.ref }}
cancel-in-progress: true
jobs:
run_multi-zh_hans_zipformer:
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans'
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Inference with pre-trained model
shell: bash
env:
GITHUB_EVENT_NAME: ${{ github.event_name }}
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/run-multi-zh_hans-zipformer.sh