mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-12-09 05:55:26 +00:00
Add CI test to cover zipformer/train.py (#1424)
This commit is contained in:
parent
702d4f5914
commit
79a42148db
59
.github/scripts/docker/Dockerfile
vendored
Normal file
59
.github/scripts/docker/Dockerfile
vendored
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
ARG PYTHON_VERSION=3.8
|
||||||
|
FROM python:${PYTHON_VERSION}
|
||||||
|
|
||||||
|
ARG TORCHAUDIO_VERSION="0.13.0"
|
||||||
|
ARG TORCH_VERSION="1.13.0"
|
||||||
|
ARG K2_VERSION="1.24.4.dev20231220"
|
||||||
|
ARG KALDIFEAT_VERSION="1.25.3.dev20231221"
|
||||||
|
|
||||||
|
ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}"
|
||||||
|
ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
|
||||||
|
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get install -qq -y \
|
||||||
|
ffmpeg \
|
||||||
|
git \
|
||||||
|
git-lfs \
|
||||||
|
less \
|
||||||
|
vim \
|
||||||
|
&& \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/cache/apt/archives /var/lib/apt/lists
|
||||||
|
|
||||||
|
|
||||||
|
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||||
|
LABEL k2_version=${_K2_VERSION}
|
||||||
|
LABEL kaldifeat_version=${_KALDIFEAT_VERSION}
|
||||||
|
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN pip install --no-cache-dir \
|
||||||
|
torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
|
||||||
|
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
|
||||||
|
git+https://github.com/lhotse-speech/lhotse \
|
||||||
|
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
|
||||||
|
kaldi_native_io \
|
||||||
|
kaldialign \
|
||||||
|
kaldifst \
|
||||||
|
kaldilm \
|
||||||
|
sentencepiece>=0.1.96 \
|
||||||
|
tensorboard \
|
||||||
|
typeguard \
|
||||||
|
dill \
|
||||||
|
onnx \
|
||||||
|
onnxruntime \
|
||||||
|
onnxmltools \
|
||||||
|
six \
|
||||||
|
multi_quantization \
|
||||||
|
typeguard \
|
||||||
|
numpy \
|
||||||
|
pytest \
|
||||||
|
graphviz
|
||||||
|
|
||||||
|
# RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||||
|
# cd /workspace/icefall && \
|
||||||
|
# pip install --no-cache-dir -r requirements.txt
|
||||||
|
#
|
||||||
|
# ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||||
|
#
|
||||||
|
# WORKDIR /workspace/icefall
|
||||||
60
.github/scripts/docker/run.sh
vendored
Executable file
60
.github/scripts/docker/run.sh
vendored
Executable file
@ -0,0 +1,60 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
cd /icefall
|
||||||
|
export PYTHONPATH=/icefall:$PYTHONPATH
|
||||||
|
python3 -c "import torch; print(torch.__file__)"
|
||||||
|
python3 -c "import torchaudio; print(torchaudio.__version__)"
|
||||||
|
python3 -c "import icefall; print(icefall.__file__)"
|
||||||
|
|
||||||
|
cd egs/librispeech/ASR
|
||||||
|
|
||||||
|
# We don't download the LM file since it is so large that it will
|
||||||
|
# cause OOM error for CI later.
|
||||||
|
mkdir -p download/lm
|
||||||
|
pushd download/lm
|
||||||
|
wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt
|
||||||
|
wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt
|
||||||
|
wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz
|
||||||
|
ls -lh
|
||||||
|
gunzip librispeech-lm-norm.txt.gz
|
||||||
|
|
||||||
|
ls -lh
|
||||||
|
popd
|
||||||
|
|
||||||
|
pushd download/
|
||||||
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2
|
||||||
|
tar xf LibriSpeech.tar.bz2
|
||||||
|
rm LibriSpeech.tar.bz2
|
||||||
|
|
||||||
|
cd LibriSpeech
|
||||||
|
ln -s train-clean-100 train-clean-360
|
||||||
|
ln -s train-other-500 train-other-500
|
||||||
|
popd
|
||||||
|
|
||||||
|
mkdir -p data/manifests
|
||||||
|
|
||||||
|
lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests
|
||||||
|
ls -lh data/manifests
|
||||||
|
|
||||||
|
./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False
|
||||||
|
ls -lh data/fbank
|
||||||
|
|
||||||
|
./prepare.sh --stage 5 --stop-stage 6
|
||||||
|
|
||||||
|
./zipformer/train.py \
|
||||||
|
--world-size 1 \
|
||||||
|
--num-epochs 1 \
|
||||||
|
--start-epoch 1 \
|
||||||
|
--use-fp16 0 \
|
||||||
|
--exp-dir zipformer/exp-small \
|
||||||
|
--causal 0 \
|
||||||
|
--num-encoder-layers 1,1,1,1,1,1 \
|
||||||
|
--feedforward-dim 64,96,96,96,96,96 \
|
||||||
|
--encoder-dim 32,64,64,64,64,64 \
|
||||||
|
--encoder-unmasked-dim 32,32,32,32,32,32 \
|
||||||
|
--base-lr 0.04 \
|
||||||
|
--full-libri 0 \
|
||||||
|
--enable-musan 0 \
|
||||||
|
--max-duration 30 \
|
||||||
|
--print-diagnostics 1
|
||||||
75
.github/workflows/build-cpu-docker.yml
vendored
Normal file
75
.github/workflows/build-cpu-docker.yml
vendored
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
name: build-cpu-docker
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: build-cpu-docker-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-cpu-docker:
|
||||||
|
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: ["3.8", "3.9", "3.10"]
|
||||||
|
torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
|
||||||
|
k2-version: ["1.24.4.dev20231220"]
|
||||||
|
kaldifeat-version: ["1.25.3.dev20231221"]
|
||||||
|
version: ["1.0"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
# refer to https://github.com/actions/checkout
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Free space
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
df -h
|
||||||
|
rm -rf /opt/hostedtoolcache
|
||||||
|
df -h
|
||||||
|
|
||||||
|
- name: 'Login to GitHub Container Registry'
|
||||||
|
uses: docker/login-action@v2
|
||||||
|
with:
|
||||||
|
registry: ghcr.io
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build docker Image
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
cd .github/scripts/docker
|
||||||
|
torch_version=${{ matrix.torch-version }}
|
||||||
|
if [[ $torch_version == 1.13.0 ]]; then
|
||||||
|
torchaudio_version=0.13.0
|
||||||
|
elif [[ $torch_version == 2.0.0 ]]; then
|
||||||
|
torchaudio_version=2.0.1
|
||||||
|
elif [[ $torch_version == 2.0.1 ]]; then
|
||||||
|
torchaudio_version=2.0.2
|
||||||
|
else
|
||||||
|
torchaudio_version=$torch_version
|
||||||
|
fi
|
||||||
|
echo "torch_version: $torch_version"
|
||||||
|
echo "torchaudio_version: $torchaudio_version"
|
||||||
|
|
||||||
|
version=${{ matrix.version }}
|
||||||
|
|
||||||
|
tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
|
||||||
|
echo "tag: $tag"
|
||||||
|
|
||||||
|
docker build \
|
||||||
|
-t $tag \
|
||||||
|
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
|
||||||
|
--build-arg TORCH_VERSION=$torch_version \
|
||||||
|
--build-arg TORCHAUDIO_VERSION=$torchaudio_version \
|
||||||
|
--build-arg K2_VERSION=${{ matrix.k2-version }} \
|
||||||
|
--build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \
|
||||||
|
.
|
||||||
|
|
||||||
|
docker image ls
|
||||||
|
docker push $tag
|
||||||
56
.github/workflows/train-librispeech.yml
vendored
Normal file
56
.github/workflows/train-librispeech.yml
vendored
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
name: train librispeech
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: train-librispeech-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
train-librispeech:
|
||||||
|
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ubuntu-latest]
|
||||||
|
python-version: ["3.8", "3.9", "3.10"]
|
||||||
|
torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
|
||||||
|
k2-version: ["1.24.4.dev20231220"]
|
||||||
|
kaldifeat-version: ["1.25.3.dev20231221"]
|
||||||
|
version: ["1.0"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
# refer to https://github.com/actions/checkout
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Free space
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
df -h
|
||||||
|
rm -rf /opt/hostedtoolcache
|
||||||
|
df -h
|
||||||
|
echo "pwd: $PWD"
|
||||||
|
echo "github.workspace ${{ github.workspace }}"
|
||||||
|
|
||||||
|
- name: Run the build process with Docker
|
||||||
|
uses: addnab/docker-run-action@v3
|
||||||
|
with:
|
||||||
|
image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
||||||
|
options: |
|
||||||
|
--volume ${{ github.workspace }}/:/icefall
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
ls -lh /icefall
|
||||||
|
|
||||||
|
/icefall/.github/scripts/docker/run.sh
|
||||||
1
egs/gigaspeech/ASR/zipformer/my_profile.py
Symbolic link
1
egs/gigaspeech/ASR/zipformer/my_profile.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../librispeech/ASR/zipformer/my_profile.py
|
||||||
@ -1 +0,0 @@
|
|||||||
../../../librispeech/ASR/zipformer/profile.py
|
|
||||||
@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Usage: ./pruned_transducer_stateless/profile.py
|
Usage: ./pruned_transducer_stateless/my_profile.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Usage: ./pruned_transducer_stateless4/profile.py
|
Usage: ./pruned_transducer_stateless4/my_profile.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Usage: ./pruned_transducer_stateless7/profile.py
|
Usage: ./pruned_transducer_stateless7/my_profile.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Usage: ./zipformer/profile.py
|
Usage: ./zipformer/my_profile.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
1
egs/tedlium3/ASR/zipformer/my_profile.py
Symbolic link
1
egs/tedlium3/ASR/zipformer/my_profile.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../../librispeech/ASR/zipformer/my_profile.py
|
||||||
@ -1 +0,0 @@
|
|||||||
../../../librispeech/ASR/zipformer/profile.py
|
|
||||||
Loading…
x
Reference in New Issue
Block a user