mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-04 06:34:20 +00:00
use a separate script to download data and generate features
This commit is contained in:
parent
4fa40f5dcd
commit
ed286895bc
58
.github/scripts/docker/Dockerfile
vendored
58
.github/scripts/docker/Dockerfile
vendored
@ -1,7 +1,55 @@
|
||||
FROM k2fsa/icefall:torch1.13.0-cuda11.6
|
||||
ARG PYTHON_VERSION=3.8
|
||||
|
||||
WORKDIR /workspace/icefall
|
||||
FROM python:${PYTHON_VERSION}
|
||||
|
||||
RUN cd egs/librispeech/ASR && \
|
||||
./prepare.sh --stop-stage 1 && \
|
||||
ls -lh download data
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -qq -y \
|
||||
git \
|
||||
git-lfs \
|
||||
less \
|
||||
vim \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/cache/apt/archives /var/lib/apt/lists
|
||||
|
||||
ARG K2_VERSION="1.24.4.dev20231220+cpu.torch1.13.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.3.dev20231221+cpu.torch1.13.0"
|
||||
ARG TORCHAUDIO_VERSION="0.13.0"
|
||||
ARG TORCH_VERSION="1.13.0"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
six \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
# RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
# cd /workspace/icefall && \
|
||||
# pip install --no-cache-dir -r requirements.txt
|
||||
#
|
||||
# ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
#
|
||||
# WORKDIR /workspace/icefall
|
||||
|
57
.github/scripts/docker/run.sh
vendored
Executable file
57
.github/scripts/docker/run.sh
vendored
Executable file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bash
|
||||
set -ex
|
||||
|
||||
cd /icefall
|
||||
export PYTHONPATH=/icefall:$PYTHONPATH
|
||||
|
||||
cd egs/librispeech/ASR
|
||||
|
||||
# We don't download the LM file since it is so large that it will
|
||||
# cause OOM error for CI later.
|
||||
mkdir -p download/lm
|
||||
pushd download/lm
|
||||
wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt
|
||||
wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt
|
||||
wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz
|
||||
ls -lh
|
||||
gunzip librispeech-lm-norm.txt.gz
|
||||
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
pushd download/
|
||||
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2
|
||||
tar xf LibriSpeech.tar.bz2
|
||||
rm LibriSpeech.tar.bz2
|
||||
|
||||
cd LibriSpeech
|
||||
ln -s train-clean-100 train-clean-360
|
||||
ln -s train-other-500 train-other-500
|
||||
popd
|
||||
|
||||
mkdir -p data/manifests
|
||||
|
||||
lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests
|
||||
ls -lh data/manifests
|
||||
|
||||
./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False
|
||||
ls -lh data/fbank
|
||||
|
||||
./prepare.sh --stage 5 --stop-stage 6
|
||||
|
||||
./zipformer/train.py \
|
||||
--world-size 1 \
|
||||
--num-epochs 1 \
|
||||
--start-epoch 1 \
|
||||
--use-fp16 0 \
|
||||
--exp-dir zipformer/exp-small \
|
||||
--causal 0 \
|
||||
--num-encoder-layers 1,1,1,1,1,1 \
|
||||
--feedforward-dim 64,96,96,96,96,96 \
|
||||
--encoder-dim 32,64,64,64,64,64 \
|
||||
--encoder-unmasked-dim 32,32,32,32,32,32 \
|
||||
--base-lr 0.04 \
|
||||
--full-libri 0 \
|
||||
--enable-musan 0 \
|
||||
--max-duration 30 \
|
||||
--print-diagnostics 1
|
49
.github/workflows/build-cpu-docker.yml
vendored
Normal file
49
.github/workflows/build-cpu-docker.yml
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
name: build-cpu-docker
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- ci-train-2
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build-cpu-docker-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-cpu-docker:
|
||||
name: ${{ matrix.python-version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
|
||||
- name: 'Login to GitHub Container Registry'
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build docker Image
|
||||
shell: bash
|
||||
run: |
|
||||
cd .github/scripts/docker
|
||||
|
||||
docker build -t ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }} --build-arg PYTHON_VERSION=${{ matrix.python-version }} .
|
||||
docker image ls
|
||||
docker push ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}
|
38
.github/workflows/train-librispeech.yml
vendored
38
.github/workflows/train-librispeech.yml
vendored
@ -11,47 +11,37 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
train-librispeech:
|
||||
name: ${{ matrix.image }}
|
||||
name: ${{ matrix.python-version }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ["3.8", "3.9", "3.10"]
|
||||
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Rename
|
||||
shell: bash
|
||||
run: |
|
||||
cp -v .github/scripts/docker/Dockerfile ./Dockerfile
|
||||
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
echo "pwd: $PWD"
|
||||
echo "github.workspace ${{ github.workspace }}"
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
- name: Run the build process with Docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
image: ghcr.io/csukuangfj/icefall:cpu-py${{ matrix.python-version }}
|
||||
options: |
|
||||
--volume ${{ github.workspace }}/:/icefall
|
||||
shell: bash
|
||||
run: |
|
||||
ls -lh /icefall
|
||||
|
||||
- name: 'Login to GitHub Container Registry'
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build docker Image
|
||||
shell: bash
|
||||
run: |
|
||||
cp -v .github/scripts/docker/Dockerfile ./Dockerfile
|
||||
docker build -t ghcr.io/k2-fsa/icefall:librispeech .
|
||||
docker image ls
|
||||
/icefall/.github/scripts/docker/run.sh
|
||||
|
Loading…
x
Reference in New Issue
Block a user