From 5f5699c5dd5b87ee60d6bb3d4e00d084d97c3999 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 22 Dec 2023 22:27:53 +0800 Subject: [PATCH 1/5] first working version --- .github/scripts/docker/Dockerfile | 22 +++++++++------- .github/workflows/build-cpu-docker.yml | 34 ++++++++++++++++++++++--- .github/workflows/train-librispeech.yml | 16 +++++++++--- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index 03e598466..47af9983b 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -1,7 +1,15 @@ ARG PYTHON_VERSION=3.8 - FROM python:${PYTHON_VERSION} +ARG TORCHAUDIO_VERSION="0.13.0" +ARG TORCH_VERSION="1.13.0" +ARG K2_VERSION="1.24.4.dev20231220" +ARG KALDIFEAT_VERSION="1.25.3.dev20231221" + +ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}" +ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}" + + RUN apt-get update -y && \ apt-get install -qq -y \ git \ @@ -12,22 +20,18 @@ RUN apt-get update -y && \ apt-get clean && \ rm -rf /var/cache/apt/archives /var/lib/apt/lists -ARG K2_VERSION="1.24.4.dev20231220+cpu.torch1.13.0" -ARG KALDIFEAT_VERSION="1.25.3.dev20231221+cpu.torch1.13.0" -ARG TORCHAUDIO_VERSION="0.13.0" -ARG TORCH_VERSION="1.13.0" LABEL authors="Fangjun Kuang " -LABEL k2_version=${K2_VERSION} -LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL k2_version=${_K2_VERSION} +LABEL kaldifeat_version=${_KALDIFEAT_VERSION} LABEL github_repo="https://github.com/k2-fsa/icefall" # Install dependencies RUN pip install --no-cache-dir \ torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \ - k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ + k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ git+https://github.com/lhotse-speech/lhotse \ - kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ + kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ kaldi_native_io \ kaldialign \ kaldifst \ diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index 67af26397..bf1125779 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -2,7 +2,7 @@ name: build-cpu-docker on: push: branches: - - ci-train-2 + - ci-train-3 workflow_dispatch: concurrency: @@ -11,13 +11,16 @@ concurrency: jobs: build-cpu-docker: - name: ${{ matrix.python-version }} + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] python-version: ["3.8", "3.9", "3.10"] + torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + k2-version: ["1.24.4.dev20231220"] + kaldifeat-version: ["1.25.3.dev20231221"] steps: # refer to https://github.com/actions/checkout @@ -43,7 +46,30 @@ jobs: shell: bash run: | cd .github/scripts/docker + torch_version=${{ matrix.torch-version }} + if [[ $torch_version == 1.13.0 ]]; then + torchaudio_version=0.13.0 + elif [[ $torch_version == 2.0.0 ]]; then + torchaudio_version=2.0.1 + elif [[ $torch_version == 2.0.1 ]]; then + torchaudio_version=2.0.2 + else + torchaudio_version=$torch_version + fi + echo "torch_version: $torch_version" + echo "torchaudio_version: $torchaudio_version" + + tag=ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }} + echo "tag: $tag" + + docker build \ + -t $tag \ + --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ + --build-arg TORCH_VERSION=$torch_version \ + --build-arg TORCHAUDIO_VERSION=$torchaudio_version \ + --build-arg K2_VERSION=${{ matrix.k2-version }} \ + --build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \ + . - docker build -t ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }} --build-arg PYTHON_VERSION=${{ matrix.python-version }} . docker image ls - docker push ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }} + docker push $tag diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 1e7ee8bc3..0a76e0cd7 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -2,7 +2,13 @@ name: train librispeech on: push: branches: - - ci-train + - master + - ci-train-2 + + pull_request: + branches: + - master + workflow_dispatch: concurrency: @@ -11,13 +17,16 @@ concurrency: jobs: train-librispeech: - name: ${{ matrix.python-version }} + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] python-version: ["3.8", "3.9", "3.10"] + torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + k2-version: ["1.24.4.dev20231220"] + kaldifeat-version: ["1.25.3.dev20231221"] steps: # refer to https://github.com/actions/checkout @@ -37,7 +46,8 @@ jobs: - name: Run the build process with Docker uses: addnab/docker-run-action@v3 with: - image: ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }} + # image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }} + image: ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash From 0979a3eb0f64ef76b13d753324a52e687e5af88e Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 22 Dec 2023 23:36:54 +0800 Subject: [PATCH 2/5] install ffmpeg for torch >= 2.1.0 --- .github/scripts/docker/Dockerfile | 1 + .github/workflows/build-cpu-docker.yml | 2 +- .github/workflows/train-librispeech.yml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index 47af9983b..6f57c1ccb 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -12,6 +12,7 @@ ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}" RUN apt-get update -y && \ apt-get install -qq -y \ + ffmpeg \ git \ git-lfs \ less \ diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index bf1125779..c89655552 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -2,7 +2,7 @@ name: build-cpu-docker on: push: branches: - - ci-train-3 + - ci-train-2 workflow_dispatch: concurrency: diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 0a76e0cd7..e9850bf84 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -3,7 +3,7 @@ on: push: branches: - master - - ci-train-2 + - ci-train-3 pull_request: branches: From eb34ed556066db02788d12dbd2db19f7f81f7afa Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 22 Dec 2023 23:41:36 +0800 Subject: [PATCH 3/5] fix torchaudio --- .github/scripts/docker/run.sh | 3 +++ .github/workflows/build-cpu-docker.yml | 2 +- .github/workflows/train-librispeech.yml | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/docker/run.sh b/.github/scripts/docker/run.sh index 2154cdf07..aeb80b330 100755 --- a/.github/scripts/docker/run.sh +++ b/.github/scripts/docker/run.sh @@ -3,6 +3,9 @@ set -ex cd /icefall export PYTHONPATH=/icefall:$PYTHONPATH +python3 -c "import torch; print(torch.__file__)" +python3 -c "import torchaudio; print(torchaudio.__version__)" +python3 -c "import icefall; print(icefall.__file__)" cd egs/librispeech/ASR diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index c89655552..bf1125779 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -2,7 +2,7 @@ name: build-cpu-docker on: push: branches: - - ci-train-2 + - ci-train-3 workflow_dispatch: concurrency: diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index e9850bf84..0a76e0cd7 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -3,7 +3,7 @@ on: push: branches: - master - - ci-train-3 + - ci-train-2 pull_request: branches: From 360c30bb59d2546bbf8207996043effa9f45ddb0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 23 Dec 2023 00:18:05 +0800 Subject: [PATCH 4/5] Rename profile to my_profile --- egs/gigaspeech/ASR/zipformer/my_profile.py | 1 + egs/gigaspeech/ASR/zipformer/profile.py | 1 - .../pruned_transducer_stateless/{profile.py => my_profile.py} | 2 +- .../pruned_transducer_stateless4/{profile.py => my_profile.py} | 2 +- .../pruned_transducer_stateless7/{profile.py => my_profile.py} | 2 +- egs/librispeech/ASR/zipformer/{profile.py => my_profile.py} | 2 +- egs/tedlium3/ASR/zipformer/my_profile.py | 1 + egs/tedlium3/ASR/zipformer/profile.py | 1 - 8 files changed, 6 insertions(+), 6 deletions(-) create mode 120000 egs/gigaspeech/ASR/zipformer/my_profile.py delete mode 120000 egs/gigaspeech/ASR/zipformer/profile.py rename egs/librispeech/ASR/pruned_transducer_stateless/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/pruned_transducer_stateless4/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/pruned_transducer_stateless7/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/zipformer/{profile.py => my_profile.py} (99%) create mode 120000 egs/tedlium3/ASR/zipformer/my_profile.py delete mode 120000 egs/tedlium3/ASR/zipformer/profile.py diff --git a/egs/gigaspeech/ASR/zipformer/my_profile.py b/egs/gigaspeech/ASR/zipformer/my_profile.py new file mode 120000 index 000000000..3a90b2628 --- /dev/null +++ b/egs/gigaspeech/ASR/zipformer/my_profile.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/my_profile.py \ No newline at end of file diff --git a/egs/gigaspeech/ASR/zipformer/profile.py b/egs/gigaspeech/ASR/zipformer/profile.py deleted file mode 120000 index c93adbd14..000000000 --- a/egs/gigaspeech/ASR/zipformer/profile.py +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/zipformer/profile.py \ No newline at end of file diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py index 09e4a7af4..b844ba613 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless/profile.py +Usage: ./pruned_transducer_stateless/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless4/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py index 252bdf060..4bf773918 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless4/profile.py +Usage: ./pruned_transducer_stateless4/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless7/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py index 0d308e966..5a068b3b6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless7/profile.py +Usage: ./pruned_transducer_stateless7/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/zipformer/profile.py b/egs/librispeech/ASR/zipformer/my_profile.py similarity index 99% rename from egs/librispeech/ASR/zipformer/profile.py rename to egs/librispeech/ASR/zipformer/my_profile.py index 57f44a90a..ca20956fb 100755 --- a/egs/librispeech/ASR/zipformer/profile.py +++ b/egs/librispeech/ASR/zipformer/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./zipformer/profile.py +Usage: ./zipformer/my_profile.py """ import argparse diff --git a/egs/tedlium3/ASR/zipformer/my_profile.py b/egs/tedlium3/ASR/zipformer/my_profile.py new file mode 120000 index 000000000..3a90b2628 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/my_profile.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/my_profile.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/profile.py b/egs/tedlium3/ASR/zipformer/profile.py deleted file mode 120000 index c93adbd14..000000000 --- a/egs/tedlium3/ASR/zipformer/profile.py +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/zipformer/profile.py \ No newline at end of file From fd09e5befe81303149354d898062b8deac6bbad0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 23 Dec 2023 00:24:20 +0800 Subject: [PATCH 5/5] minor fixes --- .github/workflows/build-cpu-docker.yml | 10 +++++----- .github/workflows/train-librispeech.yml | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index bf1125779..f931f7d09 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -1,8 +1,5 @@ name: build-cpu-docker on: - push: - branches: - - ci-train-3 workflow_dispatch: concurrency: @@ -11,7 +8,7 @@ concurrency: jobs: build-cpu-docker: - name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -21,6 +18,7 @@ jobs: torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] k2-version: ["1.24.4.dev20231220"] kaldifeat-version: ["1.25.3.dev20231221"] + version: ["1.0"] steps: # refer to https://github.com/actions/checkout @@ -59,7 +57,9 @@ jobs: echo "torch_version: $torch_version" echo "torchaudio_version: $torchaudio_version" - tag=ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }} + version=${{ matrix.version }} + + tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version echo "tag: $tag" docker build \ diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 0a76e0cd7..7c9a28f03 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -3,7 +3,6 @@ on: push: branches: - master - - ci-train-2 pull_request: branches: @@ -17,7 +16,7 @@ concurrency: jobs: train-librispeech: - name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -27,6 +26,7 @@ jobs: torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] k2-version: ["1.24.4.dev20231220"] kaldifeat-version: ["1.25.3.dev20231221"] + version: ["1.0"] steps: # refer to https://github.com/actions/checkout @@ -46,8 +46,7 @@ jobs: - name: Run the build process with Docker uses: addnab/docker-run-action@v3 with: - # image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }} - image: ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }} + image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash