mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-08 09:32:20 +00:00
first commit (#1411)
This commit is contained in:
parent
b0f70c9d04
commit
20a82c9abf
88
.github/scripts/multi-zh-hans.sh
vendored
Executable file
88
.github/scripts/multi-zh-hans.sh
vendored
Executable file
@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
log() {
|
||||
# This function is from espnet
|
||||
local fname=${BASH_SOURCE[1]##*/}
|
||||
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
||||
}
|
||||
|
||||
log "pwd: $PWD"
|
||||
|
||||
cd egs/multi_zh-hans/ASR
|
||||
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
|
||||
pushd $repo
|
||||
cd exp/
|
||||
git lfs pull --include pretrained.pt
|
||||
rm -fv epoch-20.pt
|
||||
rm -fv *.onnx
|
||||
ln -s pretrained.pt epoch-20.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
popd
|
||||
|
||||
log "----------------------------------------"
|
||||
log "Export streaming ONNX transducer models "
|
||||
log "----------------------------------------"
|
||||
|
||||
./zipformer/export-onnx-streaming.py \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--causal 1 \
|
||||
--avg 1 \
|
||||
--epoch 20 \
|
||||
--use-averaged-model 0 \
|
||||
--chunk-size 16 \
|
||||
--left-context-frames 128 \
|
||||
--use-ctc 0
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
log "------------------------------------------------------------"
|
||||
log "Test export streaming ONNX transducer models (Python code) "
|
||||
log "------------------------------------------------------------"
|
||||
|
||||
log "test fp32"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "test int8"
|
||||
./zipformer/onnx_pretrained-streaming.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav
|
||||
|
||||
log "Upload models to huggingface"
|
||||
git config --global user.name "k2-fsa"
|
||||
git config --global user.email "xxx@gmail.com"
|
||||
|
||||
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
||||
dst=$(basename $url)
|
||||
cp -v $repo/exp/*.onnx $dst
|
||||
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
||||
mkdir -p $dst/test_wavs
|
||||
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
||||
cd $dst
|
||||
git lfs track "*.onnx"
|
||||
git add .
|
||||
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
||||
|
||||
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
||||
rm -rf .git
|
||||
rm -fv .gitattributes
|
||||
cd ..
|
||||
tar cjfv $dst.tar.bz2 $dst
|
||||
mv -v $dst.tar.bz2 ../../../
|
84
.github/workflows/multi-zh-hans.yml
vendored
Normal file
84
.github/workflows/multi-zh-hans.yml
vendored
Normal file
@ -0,0 +1,84 @@
|
||||
name: run-multi-zh-hans
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- upload-ctc-model
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run-multi-zh-hans-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
multi-zh-hans:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: [3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: 'pip'
|
||||
cache-dependency-path: '**/requirements-ci.txt'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
||||
pip uninstall -y protobuf
|
||||
pip install --no-binary protobuf protobuf==3.20.*
|
||||
|
||||
- name: Cache kaldifeat
|
||||
id: my-cache
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/tmp/kaldifeat
|
||||
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
||||
|
||||
- name: Install kaldifeat
|
||||
if: steps.my-cache.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
.github/scripts/install-kaldifeat.sh
|
||||
|
||||
- name: export-model
|
||||
shell: bash
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
run: |
|
||||
sudo apt-get -qq install git-lfs tree
|
||||
export PYTHONPATH=$PWD:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
||||
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
||||
|
||||
.github/scripts/multi-zh-hans.sh
|
||||
ls -lh
|
||||
|
||||
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
file: ./*.tar.bz2
|
||||
overwrite: true
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: asr-models
|
@ -614,7 +614,9 @@ def main():
|
||||
)
|
||||
logging.info(f"averaging {filenames}")
|
||||
model.to(device)
|
||||
model.load_state_dict(average_checkpoints(filenames, device=device))
|
||||
model.load_state_dict(
|
||||
average_checkpoints(filenames, device=device), strict=False
|
||||
)
|
||||
elif params.avg == 1:
|
||||
load_checkpoint(f"{params.exp_dir}/epoch-{params.epoch}.pt", model)
|
||||
else:
|
||||
@ -625,7 +627,9 @@ def main():
|
||||
filenames.append(f"{params.exp_dir}/epoch-{i}.pt")
|
||||
logging.info(f"averaging {filenames}")
|
||||
model.to(device)
|
||||
model.load_state_dict(average_checkpoints(filenames, device=device))
|
||||
model.load_state_dict(
|
||||
average_checkpoints(filenames, device=device), strict=False
|
||||
)
|
||||
else:
|
||||
if params.iter > 0:
|
||||
filenames = find_checkpoints(params.exp_dir, iteration=-params.iter)[
|
||||
@ -653,7 +657,8 @@ def main():
|
||||
filename_start=filename_start,
|
||||
filename_end=filename_end,
|
||||
device=device,
|
||||
)
|
||||
),
|
||||
strict=False,
|
||||
)
|
||||
else:
|
||||
assert params.avg > 0, params.avg
|
||||
@ -671,7 +676,8 @@ def main():
|
||||
filename_start=filename_start,
|
||||
filename_end=filename_end,
|
||||
device=device,
|
||||
)
|
||||
),
|
||||
strict=False,
|
||||
)
|
||||
|
||||
model.to("cpu")
|
||||
|
Loading…
x
Reference in New Issue
Block a user