From 010c311fd6d4f12a5140d060cfcca263a4cf1a2a Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Dec 2023 13:47:02 +0800 Subject: [PATCH] Use cpu docker to run yesno --- .../{docker => librispeech/ASR}/run.sh | 11 +- .github/scripts/yesno/ASR/run.sh | 84 ++++++++ .github/workflows/run-yesno-recipe.yml | 182 ++++-------------- .github/workflows/train-librispeech.yml | 6 +- 4 files changed, 131 insertions(+), 152 deletions(-) rename .github/scripts/{docker => librispeech/ASR}/run.sh (86%) create mode 100755 .github/scripts/yesno/ASR/run.sh diff --git a/.github/scripts/docker/run.sh b/.github/scripts/librispeech/ASR/run.sh similarity index 86% rename from .github/scripts/docker/run.sh rename to .github/scripts/librispeech/ASR/run.sh index aeb80b330..2fdfc6e32 100755 --- a/.github/scripts/docker/run.sh +++ b/.github/scripts/librispeech/ASR/run.sh @@ -1,11 +1,10 @@ #!/usr/bin/env bash -set -ex -cd /icefall -export PYTHONPATH=/icefall:$PYTHONPATH -python3 -c "import torch; print(torch.__file__)" -python3 -c "import torchaudio; print(torchaudio.__version__)" -python3 -c "import icefall; print(icefall.__file__)" +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} cd egs/librispeech/ASR diff --git a/.github/scripts/yesno/ASR/run.sh b/.github/scripts/yesno/ASR/run.sh new file mode 100755 index 000000000..c3f445d8c --- /dev/null +++ b/.github/scripts/yesno/ASR/run.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/yesno/ASR + +log "data preparation" +./prepare.sh + +log "training" +python3 ./tdnn/train.py + +log "decoding" +python3 ./tdnn/decode.py + +log "export to pretrained.pt" + +python3 ./tdnn/export.py --epoch 14 --avg 2 + +python3 ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to torchscript" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to onnx" +python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 + +log "Test float32 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test int8 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test decoding with H" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_H.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --H ./data/lang_phone/H.fst \ + --tokens ./data/lang_phone/tokens.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Test decoding with HL" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_HL.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HL ./data/lang_phone/HL.fst \ + --words ./data/lang_phone/words.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Show generated files" +ls -lh tdnn/exp +ls -lh data/lang_phone diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index 9ac848535..a99811815 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -20,166 +20,60 @@ on: push: branches: - master + - refactor-ci + pull_request: branches: - master + workflow_dispatch: + concurrency: group: run-yesno-recipe-${{ github.ref }} cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" run-yesno-recipe: - runs-on: ${{ matrix.os }} + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest strategy: - matrix: - # os: [ubuntu-latest, macos-10.15] - # TODO: enable macOS for CPU testing - os: [ubuntu-latest] - python-version: [3.8] fail-fast: false + matrix: + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - name: Run the yesno recipe + uses: addnab/docker-run-action@v3 with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - - name: Install libnsdfile and libsox - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo apt update - sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg - sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - pip install --no-deps --force-reinstall k2==1.24.4.dev20231021+cpu.torch1.13.1 -f https://k2-fsa.github.io/k2/cpu.html - pip install kaldifeat==1.25.1.dev20231022+cpu.torch1.13.1 -f https://csukuangfj.github.io/kaldifeat/cpu.html - - - name: Run yesno recipe - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - ./prepare.sh - python3 ./tdnn/train.py - python3 ./tdnn/decode.py - - - name: Test exporting to pretrained.pt - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 - - python3 ./tdnn/pretrained.py \ - --checkpoint ./tdnn/exp/pretrained.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to torchscript - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to onnx - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 - - echo "Test float32 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - echo "Test int8 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test decoding with H - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_H.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --H ./data/lang_phone/H.fst \ - --tokens ./data/lang_phone/tokens.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Test decoding with HL - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_HL.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HL ./data/lang_phone/HL.fst \ - --words ./data/lang_phone/words.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Show generated files - shell: bash - working-directory: ${{github.workspace}} - run: | - cd egs/yesno/ASR - ls -lh tdnn/exp - ls -lh data/lang_phone + .github/scripts/yesno/ASR/run.sh diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 619b5ccc3..79002a881 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -64,6 +64,8 @@ jobs: --volume ${{ github.workspace }}/:/icefall shell: bash run: | - ls -lh /icefall + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - /icefall/.github/scripts/docker/run.sh + .github/scripts/librispeech/ASR/run.sh