Generate the dependency matrix by code for GitHub Actions (#1431)

2025-08-08 09:32:20 +00:00 · 2023-12-25 19:41:09 +08:00 · 2023-12-25 19:41:09 +08:00 · c855a58cfd
commit c855a58cfd
parent e5bb1ae86c
8 changed files with 279 additions and 183 deletions
--- a/.github/scripts/docker/Dockerfile
+++ b/.github/scripts/docker/Dockerfile
@ -31,10 +31,12 @@ LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN pip install --no-cache-dir \
      torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
      k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
+      \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
      dill \
      graphviz \
+      kaldi-decoder \
      kaldi_native_io \
      kaldialign \
      kaldifst \
--- a/.github/scripts/docker/generate_build_matrix.py
+++ b/.github/scripts/docker/generate_build_matrix.py
@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+# Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
+
+
+import json
+
+
+def version_gt(a, b):
+    a_major, a_minor = a.split(".")[:2]
+    b_major, b_minor = b.split(".")[:2]
+    if a_major > b_major:
+        return True
+
+    if a_major == b_major and a_minor > b_minor:
+        return True
+
+    return False
+
+
+def version_ge(a, b):
+    a_major, a_minor = a.split(".")[:2]
+    b_major, b_minor = b.split(".")[:2]
+    if a_major > b_major:
+        return True
+
+    if a_major == b_major and a_minor >= b_minor:
+        return True
+
+    return False
+
+
+def get_torchaudio_version(torch_version):
+    if torch_version == "1.13.0":
+        return "0.13.0"
+    elif torch_version == "1.13.1":
+        return "0.13.1"
+    elif torch_version == "2.0.0":
+        return "2.0.1"
+    elif torch_version == "2.0.1":
+        return "2.0.2"
+    else:
+        return torch_version
+
+
+def get_matrix():
+    k2_version = "1.24.4.dev20231220"
+    kaldifeat_version = "1.25.3.dev20231221"
+    version = "1.1"
+    python_version = ["3.8", "3.9", "3.10", "3.11"]
+    torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
+
+    matrix = []
+    for p in python_version:
+        for t in torch_version:
+            # torchaudio <= 1.13.x supports only python <= 3.10
+
+            if version_gt(p, "3.10") and not version_gt(t, "2.0"):
+                continue
+
+            matrix.append(
+                {
+                    "k2-version": k2_version,
+                    "kaldifeat-version": kaldifeat_version,
+                    "version": version,
+                    "python-version": p,
+                    "torch-version": t,
+                    "torchaudio-version": get_torchaudio_version(t),
+                }
+            )
+    return matrix
+
+
+def main():
+    matrix = get_matrix()
+    print(json.dumps({"include": matrix}))
+
+
+if __name__ == "__main__":
+    main()
--- a/.github/scripts/librispeech/ASR/run.sh
+++ b/.github/scripts/librispeech/ASR/run.sh
@ -1,11 +1,12 @@
 #!/usr/bin/env bash
+
 set -ex

-cd /icefall
-export PYTHONPATH=/icefall:$PYTHONPATH
-python3 -c "import torch; print(torch.__file__)"
-python3 -c "import torchaudio; print(torchaudio.__version__)"
-python3 -c "import icefall; print(icefall.__file__)"
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}

 cd egs/librispeech/ASR

--- a/.github/scripts/yesno/ASR/run.sh
+++ b/.github/scripts/yesno/ASR/run.sh
@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/yesno/ASR
+
+log "data preparation"
+./prepare.sh
+
+log "training"
+python3 ./tdnn/train.py
+
+log "decoding"
+python3 ./tdnn/decode.py
+
+log "export to pretrained.pt"
+
+python3 ./tdnn/export.py --epoch 14 --avg 2
+
+python3 ./tdnn/pretrained.py \
+  --checkpoint ./tdnn/exp/pretrained.pt \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test exporting to torchscript"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained.py \
+  --nn-model ./tdnn/exp/cpu_jit.pt \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test exporting to onnx"
+python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
+
+log "Test float32 model"
+python3 ./tdnn/onnx_pretrained.py \
+  --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test int8 model"
+python3 ./tdnn/onnx_pretrained.py \
+  --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
+  --HLG ./data/lang_phone/HLG.pt \
+  --words-file ./data/lang_phone/words.txt \
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+log "Test decoding with H"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained_decode_with_H.py \
+    --nn-model ./tdnn/exp/cpu_jit.pt \
+    --H ./data/lang_phone/H.fst \
+    --tokens ./data/lang_phone/tokens.txt \
+    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
+    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
+
+log "Test decoding with HL"
+python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
+
+python3 ./tdnn/jit_pretrained_decode_with_HL.py \
+    --nn-model ./tdnn/exp/cpu_jit.pt \
+    --HL ./data/lang_phone/HL.fst \
+    --words ./data/lang_phone/words.txt \
+    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
+    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
+
+log "Show generated files"
+ls -lh tdnn/exp
+ls -lh data/lang_phone
--- a/.github/workflows/build-cpu-docker.yml
+++ b/.github/workflows/build-cpu-docker.yml
@ -7,18 +7,31 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
  build-cpu-docker:
+    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
-        k2-version: ["1.24.4.dev20231220"]
-        kaldifeat-version: ["1.25.3.dev20231221"]
-        version: ["1.1"]
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      # refer to https://github.com/actions/checkout
@ -45,25 +58,14 @@ jobs:
        run: |
          cd .github/scripts/docker
          torch_version=${{ matrix.torch-version }}
+          torchaudio_version=${{ matrix.torchaudio-version }}

-          # see https://pytorch.org/audio/stable/installation.html#compatibility-matrix
-          if [[ $torch_version == 1.13.0 ]]; then
-            torchaudio_version=0.13.0
-          elif [[ $torch_version == 1.13.1 ]]; then
-            torchaudio_version=0.13.1
-          elif [[ $torch_version == 2.0.0 ]]; then
-            torchaudio_version=2.0.1
-          elif [[ $torch_version == 2.0.1 ]]; then
-            torchaudio_version=2.0.2
-          else
-            torchaudio_version=$torch_version
-          fi
          echo "torch_version: $torch_version"
          echo "torchaudio_version: $torchaudio_version"

          version=${{ matrix.version }}

-          tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
+          tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
          echo "tag: $tag"

          docker build \
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@ -20,166 +20,60 @@ on:
  push:
    branches:
      - master
+      - refactor-ci
+
  pull_request:
    branches:
      - master

+  workflow_dispatch:
+
 concurrency:
  group: run-yesno-recipe-${{ github.ref }}
  cancel-in-progress: true

 jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
  run-yesno-recipe:
-    runs-on: ${{ matrix.os }}
+    needs: generate_build_matrix
+    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
+    runs-on: ubuntu-latest
    strategy:
-      matrix:
-        # os: [ubuntu-latest, macos-10.15]
-        # TODO: enable macOS for CPU testing
-        os: [ubuntu-latest]
-        python-version: [3.8]
      fail-fast: false
+      matrix:
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

-      - name: Setup Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+      - name: Run the yesno recipe
+        uses: addnab/docker-run-action@v3
        with:
-          python-version: ${{ matrix.python-version }}
-          cache: 'pip'
-          cache-dependency-path: '**/requirements-ci.txt'
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+            options: |
+              --volume ${{ github.workspace }}/:/icefall
+            shell: bash
+            run: |
+              export PYTHONPATH=/icefall:$PYTHONPATH
+              cd /icefall
+              git config --global --add safe.directory /icefall

-      - name: Install libnsdfile and libsox
-        if: startsWith(matrix.os, 'ubuntu')
-        run: |
-          sudo apt update
-          sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
-          sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
-
-      - name: Install Python dependencies
-        run: |
-          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
-          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf==3.20.*
-
-          pip install --no-deps --force-reinstall k2==1.24.4.dev20231021+cpu.torch1.13.1 -f https://k2-fsa.github.io/k2/cpu.html
-          pip install kaldifeat==1.25.1.dev20231022+cpu.torch1.13.1 -f https://csukuangfj.github.io/kaldifeat/cpu.html
-
-      - name: Run yesno recipe
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          ./prepare.sh
-          python3 ./tdnn/train.py
-          python3 ./tdnn/decode.py
-
-      - name: Test exporting to pretrained.pt
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          python3 ./tdnn/export.py --epoch 14 --avg 2
-
-          python3 ./tdnn/pretrained.py \
-            --checkpoint ./tdnn/exp/pretrained.pt \
-            --HLG ./data/lang_phone/HLG.pt \
-            --words-file ./data/lang_phone/words.txt \
-            download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-            download/waves_yesno/0_0_1_0_0_0_1_0.wav
-
-      - name: Test exporting to torchscript
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
-
-          python3 ./tdnn/jit_pretrained.py \
-            --nn-model ./tdnn/exp/cpu_jit.pt \
-            --HLG ./data/lang_phone/HLG.pt \
-            --words-file ./data/lang_phone/words.txt \
-            download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-            download/waves_yesno/0_0_1_0_0_0_1_0.wav
-
-      - name: Test exporting to onnx
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
-
-          echo "Test float32 model"
-          python3 ./tdnn/onnx_pretrained.py \
-            --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
-            --HLG ./data/lang_phone/HLG.pt \
-            --words-file ./data/lang_phone/words.txt \
-            download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-            download/waves_yesno/0_0_1_0_0_0_1_0.wav
-
-
-          echo "Test int8 model"
-          python3 ./tdnn/onnx_pretrained.py \
-            --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
-            --HLG ./data/lang_phone/HLG.pt \
-            --words-file ./data/lang_phone/words.txt \
-            download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-            download/waves_yesno/0_0_1_0_0_0_1_0.wav
-
-      - name: Test decoding with H
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
-
-          python3 ./tdnn/jit_pretrained_decode_with_H.py \
-              --nn-model ./tdnn/exp/cpu_jit.pt \
-              --H ./data/lang_phone/H.fst \
-              --tokens ./data/lang_phone/tokens.txt \
-              ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-              ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
-              ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
-
-      - name: Test decoding with HL
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          export PYTHONPATH=$PWD:$PYTHONPATH
-          echo $PYTHONPATH
-
-          cd egs/yesno/ASR
-          python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
-
-          python3 ./tdnn/jit_pretrained_decode_with_HL.py \
-              --nn-model ./tdnn/exp/cpu_jit.pt \
-              --HL ./data/lang_phone/HL.fst \
-              --words ./data/lang_phone/words.txt \
-              ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
-              ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
-              ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
-
-      - name: Show generated files
-        shell: bash
-        working-directory: ${{github.workspace}}
-        run: |
-          cd egs/yesno/ASR
-          ls -lh tdnn/exp
-          ls -lh data/lang_phone
+              .github/scripts/yesno/ASR/run.sh
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -16,16 +16,31 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
  test:
+    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
-        version: ["1.1"]
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      - uses: actions/checkout@v4
@ -44,7 +59,7 @@ jobs:
      - name: Run tests
        uses: addnab/docker-run-action@v3
        with:
-            image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
--- a/.github/workflows/train-librispeech.yml
+++ b/.github/workflows/train-librispeech.yml
@ -15,16 +15,31 @@ concurrency:
  cancel-in-progress: true

 jobs:
+  generate_build_matrix:
+    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+    # see https://github.com/pytorch/pytorch/pull/50633
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Generating build matrix
+        id: set-matrix
+        run: |
+          # outputting for debugging purposes
+          python ./.github/scripts/docker/generate_build_matrix.py
+          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+          echo "::set-output name=matrix::${MATRIX}"
  train-librispeech:
+    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest]
-        python-version: ["3.8", "3.9", "3.10"]
-        torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
-        version: ["1.1"]
+        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

    steps:
      # refer to https://github.com/actions/checkout
@ -44,11 +59,13 @@ jobs:
      - name: Test zipformer/train.py with LibriSpeech
        uses: addnab/docker-run-action@v3
        with:
-            image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
-              ls -lh /icefall
+              export PYTHONPATH=/icefall:$PYTHONPATH
+              cd /icefall
+              git config --global --add safe.directory /icefall

-              /icefall/.github/scripts/docker/run.sh
+              .github/scripts/librispeech/ASR/run.sh