mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-26 18:24:18 +00:00
Merge branch 'master' into cr-ctc
This commit is contained in:
commit
6bba97514e
15
.github/scripts/docker/generate_build_matrix.py
vendored
15
.github/scripts/docker/generate_build_matrix.py
vendored
@ -45,15 +45,17 @@ def get_torchaudio_version(torch_version):
|
||||
def get_matrix():
|
||||
k2_version = "1.24.4.dev20240223"
|
||||
kaldifeat_version = "1.25.4.dev20240223"
|
||||
version = "20240725"
|
||||
version = "20240905"
|
||||
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
torch_version = []
|
||||
torch_version += ["1.13.0", "1.13.1"]
|
||||
torch_version += ["2.0.0", "2.0.1"]
|
||||
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
|
||||
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
|
||||
# torch_version += ["1.13.0", "1.13.1"]
|
||||
# torch_version += ["2.0.0", "2.0.1"]
|
||||
# torch_version += ["2.1.0", "2.1.1", "2.1.2"]
|
||||
# torch_version += ["2.2.0", "2.2.1", "2.2.2"]
|
||||
# Test only torch >= 2.3.0
|
||||
torch_version += ["2.3.0", "2.3.1"]
|
||||
torch_version += ["2.4.0"]
|
||||
torch_version += ["2.4.1"]
|
||||
|
||||
matrix = []
|
||||
for p in python_version:
|
||||
@ -82,6 +84,9 @@ def get_matrix():
|
||||
elif t == "2.4.0":
|
||||
k2_version_2 = "1.24.4.dev20240725"
|
||||
kaldifeat_version_2 = "1.25.4.dev20240725"
|
||||
elif t == "2.4.1":
|
||||
k2_version_2 = "1.24.4.dev20240905"
|
||||
kaldifeat_version_2 = "1.25.4.dev20240905"
|
||||
|
||||
matrix.append(
|
||||
{
|
||||
|
42
.github/scripts/multi-zh-hans.sh
vendored
42
.github/scripts/multi-zh-hans.sh
vendored
@ -16,6 +16,48 @@ log "pwd: $PWD"
|
||||
|
||||
cd egs/multi_zh-hans/ASR
|
||||
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
repo=$(basename $repo_url)
|
||||
pushd $repo
|
||||
cd exp
|
||||
git lfs pull --include pretrained.pt
|
||||
ln -s pretrained.pt epoch-99.pt
|
||||
cd ../data/lang_bpe_2000
|
||||
ls -lh
|
||||
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
||||
git lfs pull --include "*.model"
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
log "--------------------------------------------"
|
||||
log "Export non-streaming ONNX transducer models "
|
||||
log "--------------------------------------------"
|
||||
./zipformer/export-onnx.py \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
--use-averaged-model 0 \
|
||||
--epoch 99 \
|
||||
--avg 1 \
|
||||
--exp-dir $repo/exp \
|
||||
--causal False
|
||||
|
||||
ls -lh $repo/exp
|
||||
|
||||
./zipformer/onnx_pretrained.py \
|
||||
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
||||
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
||||
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
||||
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
||||
$repo/test_wavs/DEV_T0000000000.wav \
|
||||
$repo/test_wavs/DEV_T0000000001.wav \
|
||||
$repo/test_wavs/DEV_T0000000002.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
||||
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
||||
|
||||
rm -rf $repo
|
||||
|
||||
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
|
||||
log "Downloading pre-trained model from $repo_url"
|
||||
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
||||
|
@ -29,8 +29,8 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
|
||||
ls -lh data/fbank
|
||||
ls -lh pruned_transducer_stateless2/exp
|
||||
|
||||
ln -s data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
|
||||
ln -s data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
|
||||
ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
|
||||
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
|
||||
|
||||
log "Decoding dev and test"
|
||||
|
||||
|
1
.github/scripts/test-onnx-export.sh
vendored
1
.github/scripts/test-onnx-export.sh
vendored
@ -25,6 +25,7 @@ popd
|
||||
|
||||
log "Export via torch.jit.script()"
|
||||
./zipformer/export.py \
|
||||
--use-averaged-model 0 \
|
||||
--exp-dir $repo/exp \
|
||||
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
||||
--epoch 99 \
|
||||
|
2
.github/workflows/build-doc.yml
vendored
2
.github/workflows/build-doc.yml
vendored
@ -26,6 +26,8 @@ on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: build_doc-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
2
.github/workflows/build-docker-image.yml
vendored
2
.github/workflows/build-docker-image.yml
vendored
@ -16,7 +16,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
|
@ -33,6 +33,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_gigaspeech_2022_05_13-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@ -119,7 +121,7 @@ jobs:
|
||||
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2
|
||||
|
||||
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
|
||||
|
@ -42,7 +42,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
run_gigaspeech_2023_10_17_zipformer:
|
||||
if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@ -133,7 +133,7 @@ jobs:
|
||||
find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||
|
||||
- name: Upload decoding results for gigaspeech zipformer
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
|
||||
|
@ -16,6 +16,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@ -156,7 +158,7 @@ jobs:
|
||||
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
||||
|
||||
- name: Upload decoding results for lstm_transducer_stateless2
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
|
||||
with:
|
||||
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
|
||||
|
@ -23,6 +23,8 @@ on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_multi-corpora_zipformer-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
4
.github/workflows/run-ptb-rnn-lm.yml
vendored
4
.github/workflows/run-ptb-rnn-lm.yml
vendored
@ -16,6 +16,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_ptb_rnn_lm_training-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@ -64,7 +66,7 @@ jobs:
|
||||
./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
|
||||
|
||||
- name: Upload pretrained models
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
|
||||
with:
|
||||
name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
|
||||
|
2
.github/workflows/run-swbd-conformer-ctc.yml
vendored
2
.github/workflows/run-swbd-conformer-ctc.yml
vendored
@ -23,6 +23,8 @@ on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run-swbd-conformer_ctc-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
@ -23,6 +23,8 @@ on:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
2
.github/workflows/style_check.yml
vendored
2
.github/workflows/style_check.yml
vendored
@ -24,6 +24,8 @@ on:
|
||||
branches:
|
||||
- master
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: style_check-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
2
.github/workflows/test-ncnn-export.yml
vendored
2
.github/workflows/test-ncnn-export.yml
vendored
@ -16,6 +16,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: test_ncnn_export-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
2
.github/workflows/test-onnx-export.yml
vendored
2
.github/workflows/test-onnx-export.yml
vendored
@ -16,6 +16,8 @@ on:
|
||||
# nightly build at 15:50 UTC time every day
|
||||
- cron: "50 15 * * *"
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: test_onnx_export-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
@ -105,7 +105,7 @@ jobs:
|
||||
cd ../zipformer
|
||||
pytest -v -s
|
||||
|
||||
- uses: actions/upload-artifact@v2
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
path: egs/librispeech/ASR/zipformer/swoosh.pdf
|
||||
name: swoosh.pdf
|
||||
name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
||||
|
73
docker/torch2.4.1-cuda11.8.dockerfile
Normal file
73
docker/torch2.4.1-cuda11.8.dockerfile
Normal file
@ -0,0 +1,73 @@
|
||||
FROM pytorch/pytorch:2.4.1-cuda11.8-cudnn9-devel
|
||||
# python 3.10
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20240905+cuda11.8.torch2.4.1"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda11.8.torch2.4.1"
|
||||
ARG TORCHAUDIO_VERSION="2.4.1+cu118"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
vim \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
ffmpeg \
|
||||
g++ \
|
||||
gfortran \
|
||||
git \
|
||||
libtool \
|
||||
make \
|
||||
patch \
|
||||
sox \
|
||||
subversion \
|
||||
unzip \
|
||||
valgrind \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
cd /workspace/icefall && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
|
||||
WORKDIR /workspace/icefall
|
73
docker/torch2.4.1-cuda12.1.dockerfile
Normal file
73
docker/torch2.4.1-cuda12.1.dockerfile
Normal file
@ -0,0 +1,73 @@
|
||||
FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-devel
|
||||
# python 3.10
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20240905+cuda12.1.torch2.4.1"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.1.torch2.4.1"
|
||||
ARG TORCHAUDIO_VERSION="2.4.1+cu121"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
vim \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
ffmpeg \
|
||||
g++ \
|
||||
gfortran \
|
||||
git \
|
||||
libtool \
|
||||
make \
|
||||
patch \
|
||||
sox \
|
||||
subversion \
|
||||
unzip \
|
||||
valgrind \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
cd /workspace/icefall && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
|
||||
WORKDIR /workspace/icefall
|
73
docker/torch2.4.1-cuda12.4.dockerfile
Normal file
73
docker/torch2.4.1-cuda12.4.dockerfile
Normal file
@ -0,0 +1,73 @@
|
||||
FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-devel
|
||||
# python 3.10
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20240905+cuda12.4.torch2.4.1"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.4.torch2.4.1"
|
||||
ARG TORCHAUDIO_VERSION="2.4.1+cu124"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
vim \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
ffmpeg \
|
||||
g++ \
|
||||
gfortran \
|
||||
git \
|
||||
libtool \
|
||||
make \
|
||||
patch \
|
||||
sox \
|
||||
subversion \
|
||||
unzip \
|
||||
valgrind \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
onnxoptimizer \
|
||||
onnxsim \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
cd /workspace/icefall && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
|
||||
WORKDIR /workspace/icefall
|
@ -34,6 +34,12 @@ which will give you something like below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
"torch2.4.1-cuda12.4"
|
||||
"torch2.4.1-cuda12.1"
|
||||
"torch2.4.1-cuda11.8"
|
||||
"torch2.4.0-cuda12.4"
|
||||
"torch2.4.0-cuda12.1"
|
||||
"torch2.4.0-cuda11.8"
|
||||
"torch2.3.1-cuda12.1"
|
||||
"torch2.3.1-cuda11.8"
|
||||
"torch2.2.2-cuda12.1"
|
||||
|
@ -87,7 +87,7 @@ fi
|
||||
if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
|
||||
log "Stage 3: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
# to $dl_dir/musan
|
||||
if [ ! -f data/manifests/.musan_manifests.done ]; then
|
||||
log "It may take 6 minutes"
|
||||
mkdir -p data/manifests
|
||||
|
@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
|
||||
# for train, we use smaller context and larger batches to speed-up processing
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 5.0 \
|
||||
--use-garbage-class \
|
||||
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
|
||||
for part in eval test; do
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
|
||||
$EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 15.0 \
|
||||
|
@ -65,7 +65,7 @@ fi
|
||||
if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
|
||||
log "Stage 2: Prepare musan manifest"
|
||||
# We assume that you have downloaded the musan corpus
|
||||
# to data/musan
|
||||
# to $dl_dir/musan
|
||||
mkdir -p data/manifests
|
||||
lhotse prepare musan $dl_dir/musan data/manifests
|
||||
fi
|
||||
|
@ -82,7 +82,7 @@ class AlimeetingAsrDataModule:
|
||||
group.add_argument(
|
||||
"--manifest-dir",
|
||||
type=Path,
|
||||
default=Path("data/manifests"),
|
||||
default=Path("data/fbank"),
|
||||
help="Path to directory with train/valid/test cuts.",
|
||||
)
|
||||
group.add_argument(
|
||||
@ -327,9 +327,11 @@ class AlimeetingAsrDataModule:
|
||||
def test_dataloaders(self, cuts: CutSet) -> DataLoader:
|
||||
logging.debug("About to create test dataset")
|
||||
test = K2SpeechRecognitionDataset(
|
||||
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
|
||||
if self.args.on_the_fly_feats
|
||||
else PrecomputedFeatures(),
|
||||
input_strategy=(
|
||||
OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
|
||||
if self.args.on_the_fly_feats
|
||||
else PrecomputedFeatures()
|
||||
),
|
||||
return_cuts=True,
|
||||
)
|
||||
sampler = DynamicBucketingSampler(
|
||||
|
@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then
|
||||
# for train, we use smaller context and larger batches to speed-up processing
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
$EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 5.0 \
|
||||
--use-garbage-class \
|
||||
@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then
|
||||
for part in dev test; do
|
||||
for JOB in $(seq $nj); do
|
||||
gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \
|
||||
$EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \
|
||||
$EXP_DIR/enhanced \
|
||||
--bss-iterations 10 \
|
||||
--context-duration 15.0 \
|
||||
|
@ -74,7 +74,6 @@ import onnx
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from decoder import Decoder
|
||||
from onnxconverter_common import float16
|
||||
from onnxruntime.quantization import QuantType, quantize_dynamic
|
||||
from scaling_converter import convert_scaled_to_non_scaled
|
||||
from train import add_model_arguments, get_model, get_params
|
||||
@ -756,6 +755,7 @@ def main():
|
||||
logging.info(f"Exported joiner to {joiner_filename}")
|
||||
|
||||
if(params.fp16) :
|
||||
from onnxconverter_common import float16
|
||||
logging.info("Generate fp16 models")
|
||||
|
||||
encoder = onnx.load(encoder_filename)
|
||||
|
@ -191,6 +191,7 @@ class Zipformer2(EncoderInterface):
|
||||
dim=encoder_dim[i],
|
||||
downsample=downsampling_factor[i],
|
||||
dropout=dropout,
|
||||
causal=causal,
|
||||
)
|
||||
|
||||
encoders.append(encoder)
|
||||
@ -198,7 +199,10 @@ class Zipformer2(EncoderInterface):
|
||||
self.encoders = nn.ModuleList(encoders)
|
||||
|
||||
self.downsample_output = SimpleDownsample(
|
||||
max(encoder_dim), downsample=output_downsampling_factor, dropout=dropout
|
||||
max(encoder_dim),
|
||||
downsample=output_downsampling_factor,
|
||||
dropout=dropout,
|
||||
causal=causal,
|
||||
)
|
||||
|
||||
def get_feature_masks(self, x: Tensor) -> Union[List[float], List[Tensor]]:
|
||||
@ -1217,11 +1221,16 @@ class DownsampledZipformer2Encoder(nn.Module):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, encoder: nn.Module, dim: int, downsample: int, dropout: FloatLike
|
||||
self,
|
||||
encoder: nn.Module,
|
||||
dim: int,
|
||||
downsample: int,
|
||||
dropout: FloatLike,
|
||||
causal: bool,
|
||||
):
|
||||
super(DownsampledZipformer2Encoder, self).__init__()
|
||||
self.downsample_factor = downsample
|
||||
self.downsample = SimpleDownsample(dim, downsample, dropout)
|
||||
self.downsample = SimpleDownsample(dim, downsample, dropout, causal)
|
||||
self.num_layers = encoder.num_layers
|
||||
self.encoder = encoder
|
||||
self.upsample = SimpleUpsample(dim, downsample)
|
||||
@ -1310,9 +1319,12 @@ class SimpleDownsample(torch.nn.Module):
|
||||
Does downsampling with attention, by weighted sum, and a projection..
|
||||
"""
|
||||
|
||||
def __init__(self, channels: int, downsample: int, dropout: FloatLike):
|
||||
def __init__(
|
||||
self, channels: int, downsample: int, dropout: FloatLike, causal: bool
|
||||
):
|
||||
super(SimpleDownsample, self).__init__()
|
||||
|
||||
self.causal = causal
|
||||
self.bias = nn.Parameter(torch.zeros(downsample))
|
||||
|
||||
self.name = None # will be set from training code
|
||||
@ -1333,9 +1345,18 @@ class SimpleDownsample(torch.nn.Module):
|
||||
# Pad to an exact multiple of self.downsample
|
||||
# right-pad src, repeating the last element.
|
||||
pad = d_seq_len * ds - seq_len
|
||||
src_extra = src[src.shape[0] - 1 :].expand(pad, src.shape[1], src.shape[2])
|
||||
src = torch.cat((src, src_extra), dim=0)
|
||||
assert src.shape[0] == d_seq_len * ds
|
||||
|
||||
if self.causal and torch.jit.is_tracing():
|
||||
assert (
|
||||
pad == 0
|
||||
), f"pad should be zero for exporting streaming models. Given {pad}"
|
||||
|
||||
# If we are exporting a streaming model, then we skip the if statement
|
||||
if not self.causal or not torch.jit.is_tracing():
|
||||
src_extra = src[src.shape[0] - 1 :].expand(pad, src.shape[1], src.shape[2])
|
||||
src = torch.cat((src, src_extra), dim=0)
|
||||
|
||||
assert src.shape[0] == d_seq_len * ds, (src.shape, d_seq_len, ds)
|
||||
|
||||
src = src.reshape(d_seq_len, ds, batch_size, in_channels)
|
||||
|
||||
@ -1609,7 +1630,11 @@ class RelPositionMultiheadAttentionWeights(nn.Module):
|
||||
k = x[..., query_dim : 2 * query_dim]
|
||||
# p is the position-encoding query
|
||||
p = x[..., 2 * query_dim :]
|
||||
assert p.shape[-1] == num_heads * pos_head_dim, (p.shape[-1], num_heads, pos_head_dim)
|
||||
assert p.shape[-1] == num_heads * pos_head_dim, (
|
||||
p.shape[-1],
|
||||
num_heads,
|
||||
pos_head_dim,
|
||||
)
|
||||
|
||||
q = self.copy_query(q) # for diagnostics only, does nothing.
|
||||
k = self.whiten_keys(self.balance_keys(k)) # does nothing in the forward pass.
|
||||
|
@ -63,8 +63,8 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
|
||||
ln -svf $(realpath ./open-commands/CN/small/commands.txt) commands_small.txt
|
||||
ln -svf $(realpath ./open-commands/CN/large/commands.txt) commands_large.txt
|
||||
pushd open-commands
|
||||
./script/prepare.sh --stage 1 --stop-stage 1
|
||||
./script/prepare.sh --stage 3 --stop-stage 5
|
||||
./scripts/prepare.sh --stage 1 --stop-stage 1
|
||||
./scripts/prepare.sh --stage 3 --stop-stage 5
|
||||
popd
|
||||
popd
|
||||
pushd data/fbank
|
||||
|
Loading…
x
Reference in New Issue
Block a user