diff --git a/.github/scripts/multi-zh-hans.sh b/.github/scripts/multi-zh-hans.sh deleted file mode 100755 index e254419ff..000000000 --- a/.github/scripts/multi-zh-hans.sh +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env bash - -set -ex - -git config --global user.name "k2-fsa" -git config --global user.email "csukuangfj@gmail.com" -git config --global lfs.allowincompletepush true - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -log "pwd: $PWD" - -cd egs/multi_zh-hans/ASR - -repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) -pushd $repo -cd exp -git lfs pull --include pretrained.pt -ln -s pretrained.pt epoch-99.pt -cd ../data/lang_bpe_2000 -ls -lh -git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model -git lfs pull --include "*.model" -ls -lh -popd - -log "--------------------------------------------" -log "Export non-streaming ONNX transducer models " -log "--------------------------------------------" -./zipformer/export-onnx.py \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --use-averaged-model 0 \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --causal False - -ls -lh $repo/exp - -./zipformer/onnx_pretrained.py \ - --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav \ - $repo/test_wavs/DEV_T0000000001.wav \ - $repo/test_wavs/DEV_T0000000002.wav \ - $repo/test_wavs/TEST_MEETING_T0000000113.wav \ - $repo/test_wavs/TEST_MEETING_T0000000219.wav \ - $repo/test_wavs/TEST_MEETING_T0000000351.wav - -rm -rf $repo - -repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05 -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -pushd $repo -cd exp/ -git lfs pull --include pretrained.pt -rm -fv epoch-20.pt -rm -fv *.onnx -ln -s pretrained.pt epoch-20.pt -cd ../data/lang_bpe_2000 -ls -lh -git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model -git lfs pull --include "*.model" -ls -lh -popd - -log "----------------------------------------" -log "Export streaming ONNX CTC models " -log "----------------------------------------" -./zipformer/export-onnx-streaming-ctc.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --causal 1 \ - --avg 1 \ - --epoch 20 \ - --use-averaged-model 0 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --use-ctc 1 - -ls -lh $repo/exp/ - -log "------------------------------------------------------------" -log "Test exported streaming ONNX CTC models (greedy search) " -log "------------------------------------------------------------" - -test_wavs=( -DEV_T0000000000.wav -DEV_T0000000001.wav -DEV_T0000000002.wav -TEST_MEETING_T0000000113.wav -TEST_MEETING_T0000000219.wav -TEST_MEETING_T0000000351.wav -) - -for w in ${test_wavs[@]}; do - ./zipformer/onnx_pretrained-streaming-ctc.py \ - --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/$w -done - -log "Upload onnx CTC models to huggingface" -url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 -GIT_LFS_SKIP_SMUDGE=1 git clone $url -dst=$(basename $url) -cp -v $repo/exp/ctc*.onnx $dst -cp -v $repo/data/lang_bpe_2000/tokens.txt $dst -cp -v $repo/data/lang_bpe_2000/bpe.model $dst -mkdir -p $dst/test_wavs -cp -v $repo/test_wavs/*.wav $dst/test_wavs -cd $dst -git lfs track "*.onnx" "bpe.model" -ls -lh -file bpe.model -git status -git add . -git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true - -log "Upload models to https://github.com/k2-fsa/sherpa-onnx" -rm -rf .git -rm -fv .gitattributes -cd .. -tar cjfv $dst.tar.bz2 $dst -ls -lh *.tar.bz2 -mv -v $dst.tar.bz2 ../../../ - -log "----------------------------------------" -log "Export streaming ONNX transducer models " -log "----------------------------------------" - -./zipformer/export-onnx-streaming.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - --causal 1 \ - --avg 1 \ - --epoch 20 \ - --use-averaged-model 0 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --use-ctc 0 - -ls -lh $repo/exp - -log "------------------------------------------------------------" -log "Test exported streaming ONNX transducer models (Python code)" -log "------------------------------------------------------------" - -log "test fp32" -./zipformer/onnx_pretrained-streaming.py \ - --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav - -log "test int8" -./zipformer/onnx_pretrained-streaming.py \ - --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ - --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ - --tokens $repo/data/lang_bpe_2000/tokens.txt \ - $repo/test_wavs/DEV_T0000000000.wav - -log "Upload onnx transducer models to huggingface" - -url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12 -GIT_LFS_SKIP_SMUDGE=1 git clone $url -dst=$(basename $url) -cp -v $repo/exp/encoder*.onnx $dst -cp -v $repo/exp/decoder*.onnx $dst -cp -v $repo/exp/joiner*.onnx $dst -cp -v $repo/data/lang_bpe_2000/tokens.txt $dst -cp -v $repo/data/lang_bpe_2000/bpe.model $dst -mkdir -p $dst/test_wavs -cp -v $repo/test_wavs/*.wav $dst/test_wavs -cd $dst -git lfs track "*.onnx" bpe.model -git add . -git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true - -log "Upload models to https://github.com/k2-fsa/sherpa-onnx" -rm -rf .git -rm -fv .gitattributes -cd .. -tar cjfv $dst.tar.bz2 $dst -ls -lh *.tar.bz2 -mv -v $dst.tar.bz2 ../../../ diff --git a/.github/scripts/multi_zh-hans/ASR/run.sh b/.github/scripts/multi_zh-hans/ASR/run.sh new file mode 100755 index 000000000..c0b804d9e --- /dev/null +++ b/.github/scripts/multi_zh-hans/ASR/run.sh @@ -0,0 +1,210 @@ +#!/usr/bin/env bash + +set -ex + +git config --global user.name "k2-fsa" +git config --global user.email "csukuangfj@gmail.com" +git config --global lfs.allowincompletepush true + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/multi_zh-hans/ASR + +log "pwd: $PWD" + +function run_2023_9_2() { + repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + cd exp + git lfs pull --include pretrained.pt + ln -s pretrained.pt epoch-99.pt + cd ../data/lang_bpe_2000 + ls -lh + git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model + git lfs pull --include "*.model" + ls -lh + popd + + log "--------------------------------------------" + log "Export non-streaming ONNX transducer models " + log "--------------------------------------------" + ./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --causal False + + ls -lh $repo/exp + + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + + rm -rf $repo +} + +function run_2023_11_05_streaming() { + repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05 + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + pushd $repo + cd exp/ + git lfs pull --include pretrained.pt + rm -fv epoch-20.pt + rm -fv *.onnx + ln -s pretrained.pt epoch-20.pt + cd ../data/lang_bpe_2000 + ls -lh + git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model + git lfs pull --include "*.model" + ls -lh + popd + + log "----------------------------------------" + log "Export streaming ONNX CTC models " + log "----------------------------------------" + ./zipformer/export-onnx-streaming-ctc.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 20 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 1 + + ls -lh $repo/exp/ + + log "------------------------------------------------------------" + log "Test exported streaming ONNX CTC models (greedy search) " + log "------------------------------------------------------------" + + test_wavs=( + DEV_T0000000000.wav + DEV_T0000000001.wav + DEV_T0000000002.wav + TEST_MEETING_T0000000113.wav + TEST_MEETING_T0000000219.wav + TEST_MEETING_T0000000351.wav + ) + + for w in ${test_wavs[@]}; do + ./zipformer/onnx_pretrained-streaming-ctc.py \ + --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/$w + done + + log "Upload onnx CTC models to huggingface" + url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + cp -v $repo/exp/ctc*.onnx $dst + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" "bpe.model" + ls -lh + file bpe.model + git status + git add . + git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ + + log "----------------------------------------" + log "Export streaming ONNX transducer models " + log "----------------------------------------" + + ./zipformer/export-onnx-streaming.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --causal 1 \ + --avg 1 \ + --epoch 20 \ + --use-averaged-model 0 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --use-ctc 0 + + ls -lh $repo/exp + + log "------------------------------------------------------------" + log "Test exported streaming ONNX transducer models (Python code)" + log "------------------------------------------------------------" + + log "test fp32" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav + + log "test int8" + ./zipformer/onnx_pretrained-streaming.py \ + --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav +} + +function run_2023_12_12_streaming() { + log "Upload onnx transducer models to huggingface" + + url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12 + GIT_LFS_SKIP_SMUDGE=1 git clone $url + dst=$(basename $url) + cp -v $repo/exp/encoder*.onnx $dst + cp -v $repo/exp/decoder*.onnx $dst + cp -v $repo/exp/joiner*.onnx $dst + cp -v $repo/data/lang_bpe_2000/tokens.txt $dst + cp -v $repo/data/lang_bpe_2000/bpe.model $dst + mkdir -p $dst/test_wavs + cp -v $repo/test_wavs/*.wav $dst/test_wavs + cd $dst + git lfs track "*.onnx" bpe.model + git add . + git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true + + log "Upload models to https://github.com/k2-fsa/sherpa-onnx" + rm -rf .git + rm -fv .gitattributes + cd .. + tar cjfv $dst.tar.bz2 $dst + ls -lh *.tar.bz2 + mv -v $dst.tar.bz2 ../../../ +} + +run_2023_9_2 +run_2023_11_05_streaming +run_2023_12_12_streaming diff --git a/.github/workflows/multi-zh-hans.yml b/.github/workflows/multi-zh-hans.yml index 9081047de..db90efa15 100644 --- a/.github/workflows/multi-zh-hans.yml +++ b/.github/workflows/multi-zh-hans.yml @@ -15,47 +15,68 @@ permissions: contents: write jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.0.0" --python-version "3.10" + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.0.0" --python-version "3.10") + echo "::set-output name=matrix::${MATRIX}" multi-zh-hans: - runs-on: ${{ matrix.os }} + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: [3.8] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' + - name: Free space shell: bash run: | - .github/scripts/install-kaldifeat.sh + df -h + rm -rf /opt/hostedtoolcache + df -h + echo "pwd: $PWD" + echo "github.workspace ${{ github.workspace }}" + + - name: Test with multi_zh-hans + uses: addnab/docker-run-action@v3 + with: + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + export HF_TOKEN=${{ secrets.HF_TOKEN }} + cd /icefall + git config --global --add safe.directory /icefall + + .github/scripts/multi_zh-hans/ASR/run.sh + + - name: Show models + shell: bash + run: | + ls -lh *.tar.bz2 - name: export-model + if: false shell: bash env: HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming-ctc.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming-ctc.py index 1eba6093b..3d32b0a9c 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming-ctc.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming-ctc.py @@ -136,6 +136,13 @@ def get_parser(): help="The context size in the decoder. 1 means bigram; 2 means tri-gram", ) + parser.add_argument( + "--use-whisper-features", + type=str2bool, + default=False, + help="True to use whisper features. Must match the one used in training", + ) + add_model_arguments(parser) return parser @@ -270,6 +277,7 @@ def export_streaming_ctc_model_onnx( model: OnnxModel, encoder_filename: str, opset_version: int = 11, + use_whisper_features: bool = False, ) -> None: model.encoder.__class__.forward = model.encoder.__class__.streaming_forward @@ -367,6 +375,10 @@ def export_streaming_ctc_model_onnx( "value_head_dims": value_head_dims, "num_heads": num_heads, } + + if use_whisper_features: + meta_data["feature"] = "whisper" + logging.info(f"meta_data: {meta_data}") for i in range(len(init_state[:-2]) // 6): @@ -547,6 +559,7 @@ def main(): model, model_filename, opset_version=opset_version, + use_whisper_features=params.use_whisper_features, ) logging.info(f"Exported model to {model_filename}") diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index a35eb5287..780bc3c45 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -162,6 +162,13 @@ def get_parser(): help="Whether to export models in fp16", ) + parser.add_argument( + "--use-whisper-features", + type=str2bool, + default=False, + help="True to use whisper features. Must match the one used in training", + ) + add_model_arguments(parser) return parser @@ -342,6 +349,7 @@ def export_encoder_model_onnx( encoder_filename: str, opset_version: int = 11, feature_dim: int = 80, + use_whisper_features: bool = False, ) -> None: encoder_model.encoder.__class__.forward = ( encoder_model.encoder.__class__.streaming_forward @@ -441,6 +449,9 @@ def export_encoder_model_onnx( "value_head_dims": value_head_dims, "num_heads": num_heads, } + if use_whisper_features: + meta_data["feature"] = "whisper" + logging.info(f"meta_data: {meta_data}") for i in range(len(init_state[:-2]) // 6): @@ -734,6 +745,7 @@ def main(): encoder_filename, opset_version=opset_version, feature_dim=params.feature_dim, + use_whisper_features=params.use_whisper_features, ) logging.info(f"Exported encoder to {encoder_filename}")