From 702d4f59147a81ef7ba37c7863ae7bd258c743a9 Mon Sep 17 00:00:00 2001 From: TianHao Zhang <32243340+Zth9730@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:42:33 +0800 Subject: [PATCH 01/15] Update prepare.sh (#1422) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the bug in line 251: 1、 del the additional blank 2、correct the spell error of "new_vocab_size" --- egs/libriheavy/ASR/prepare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/libriheavy/ASR/prepare.sh b/egs/libriheavy/ASR/prepare.sh index af7e3c5b0..b0736c98b 100755 --- a/egs/libriheavy/ASR/prepare.sh +++ b/egs/libriheavy/ASR/prepare.sh @@ -248,7 +248,7 @@ if [ $stage -le 10 ] && [ $stop_stage -ge 10 ]; then | jq '.supervisions[].text' | sed 's/"//;s/\\//g;s/"$//' > data/punc_texts fi for vocab_size in ${vocab_sizes[@]}; do - new_vacab_size = $(($vocab_size + 256)) + new_vocab_size=$(($vocab_size + 256)) lang_dir=data/lang_punc_bpe_${new_vocab_size} mkdir -p $lang_dir From 79a42148dbcd98c42586f8386d91f6f4bb8f9979 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 23 Dec 2023 00:38:36 +0800 Subject: [PATCH 02/15] Add CI test to cover zipformer/train.py (#1424) --- .github/scripts/docker/Dockerfile | 59 +++++++++++++++ .github/scripts/docker/run.sh | 60 +++++++++++++++ .github/workflows/build-cpu-docker.yml | 75 +++++++++++++++++++ .github/workflows/train-librispeech.yml | 56 ++++++++++++++ egs/gigaspeech/ASR/zipformer/my_profile.py | 1 + egs/gigaspeech/ASR/zipformer/profile.py | 1 - .../{profile.py => my_profile.py} | 2 +- .../{profile.py => my_profile.py} | 2 +- .../{profile.py => my_profile.py} | 2 +- .../zipformer/{profile.py => my_profile.py} | 2 +- egs/tedlium3/ASR/zipformer/my_profile.py | 1 + egs/tedlium3/ASR/zipformer/profile.py | 1 - 12 files changed, 256 insertions(+), 6 deletions(-) create mode 100644 .github/scripts/docker/Dockerfile create mode 100755 .github/scripts/docker/run.sh create mode 100644 .github/workflows/build-cpu-docker.yml create mode 100644 .github/workflows/train-librispeech.yml create mode 120000 egs/gigaspeech/ASR/zipformer/my_profile.py delete mode 120000 egs/gigaspeech/ASR/zipformer/profile.py rename egs/librispeech/ASR/pruned_transducer_stateless/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/pruned_transducer_stateless4/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/pruned_transducer_stateless7/{profile.py => my_profile.py} (98%) rename egs/librispeech/ASR/zipformer/{profile.py => my_profile.py} (99%) create mode 120000 egs/tedlium3/ASR/zipformer/my_profile.py delete mode 120000 egs/tedlium3/ASR/zipformer/profile.py diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile new file mode 100644 index 000000000..55c3aa1b9 --- /dev/null +++ b/.github/scripts/docker/Dockerfile @@ -0,0 +1,59 @@ +ARG PYTHON_VERSION=3.8 +FROM python:${PYTHON_VERSION} + +ARG TORCHAUDIO_VERSION="0.13.0" +ARG TORCH_VERSION="1.13.0" +ARG K2_VERSION="1.24.4.dev20231220" +ARG KALDIFEAT_VERSION="1.25.3.dev20231221" + +ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}" +ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}" + +RUN apt-get update -y && \ + apt-get install -qq -y \ + ffmpeg \ + git \ + git-lfs \ + less \ + vim \ + && \ + apt-get clean && \ + rm -rf /var/cache/apt/archives /var/lib/apt/lists + + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${_K2_VERSION} +LABEL kaldifeat_version=${_KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +# Install dependencies +RUN pip install --no-cache-dir \ + torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \ + k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + six \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +# RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ +# cd /workspace/icefall && \ +# pip install --no-cache-dir -r requirements.txt +# +# ENV PYTHONPATH /workspace/icefall:$PYTHONPATH +# +# WORKDIR /workspace/icefall diff --git a/.github/scripts/docker/run.sh b/.github/scripts/docker/run.sh new file mode 100755 index 000000000..aeb80b330 --- /dev/null +++ b/.github/scripts/docker/run.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -ex + +cd /icefall +export PYTHONPATH=/icefall:$PYTHONPATH +python3 -c "import torch; print(torch.__file__)" +python3 -c "import torchaudio; print(torchaudio.__version__)" +python3 -c "import icefall; print(icefall.__file__)" + +cd egs/librispeech/ASR + +# We don't download the LM file since it is so large that it will +# cause OOM error for CI later. +mkdir -p download/lm +pushd download/lm +wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt +wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt +wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz +ls -lh +gunzip librispeech-lm-norm.txt.gz + +ls -lh +popd + +pushd download/ +wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2 +tar xf LibriSpeech.tar.bz2 +rm LibriSpeech.tar.bz2 + +cd LibriSpeech +ln -s train-clean-100 train-clean-360 +ln -s train-other-500 train-other-500 +popd + +mkdir -p data/manifests + +lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests +ls -lh data/manifests + +./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False +ls -lh data/fbank + +./prepare.sh --stage 5 --stop-stage 6 + +./zipformer/train.py \ + --world-size 1 \ + --num-epochs 1 \ + --start-epoch 1 \ + --use-fp16 0 \ + --exp-dir zipformer/exp-small \ + --causal 0 \ + --num-encoder-layers 1,1,1,1,1,1 \ + --feedforward-dim 64,96,96,96,96,96 \ + --encoder-dim 32,64,64,64,64,64 \ + --encoder-unmasked-dim 32,32,32,32,32,32 \ + --base-lr 0.04 \ + --full-libri 0 \ + --enable-musan 0 \ + --max-duration 30 \ + --print-diagnostics 1 diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml new file mode 100644 index 000000000..f931f7d09 --- /dev/null +++ b/.github/workflows/build-cpu-docker.yml @@ -0,0 +1,75 @@ +name: build-cpu-docker +on: + workflow_dispatch: + +concurrency: + group: build-cpu-docker-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-cpu-docker: + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8", "3.9", "3.10"] + torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + k2-version: ["1.24.4.dev20231220"] + kaldifeat-version: ["1.25.3.dev20231221"] + version: ["1.0"] + + steps: + # refer to https://github.com/actions/checkout + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Free space + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + + - name: 'Login to GitHub Container Registry' + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build docker Image + shell: bash + run: | + cd .github/scripts/docker + torch_version=${{ matrix.torch-version }} + if [[ $torch_version == 1.13.0 ]]; then + torchaudio_version=0.13.0 + elif [[ $torch_version == 2.0.0 ]]; then + torchaudio_version=2.0.1 + elif [[ $torch_version == 2.0.1 ]]; then + torchaudio_version=2.0.2 + else + torchaudio_version=$torch_version + fi + echo "torch_version: $torch_version" + echo "torchaudio_version: $torchaudio_version" + + version=${{ matrix.version }} + + tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version + echo "tag: $tag" + + docker build \ + -t $tag \ + --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ + --build-arg TORCH_VERSION=$torch_version \ + --build-arg TORCHAUDIO_VERSION=$torchaudio_version \ + --build-arg K2_VERSION=${{ matrix.k2-version }} \ + --build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \ + . + + docker image ls + docker push $tag diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml new file mode 100644 index 000000000..7c9a28f03 --- /dev/null +++ b/.github/workflows/train-librispeech.yml @@ -0,0 +1,56 @@ +name: train librispeech +on: + push: + branches: + - master + + pull_request: + branches: + - master + + workflow_dispatch: + +concurrency: + group: train-librispeech-${{ github.ref }} + cancel-in-progress: true + +jobs: + train-librispeech: + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8", "3.9", "3.10"] + torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + k2-version: ["1.24.4.dev20231220"] + kaldifeat-version: ["1.25.3.dev20231221"] + version: ["1.0"] + + steps: + # refer to https://github.com/actions/checkout + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Free space + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + echo "pwd: $PWD" + echo "github.workspace ${{ github.workspace }}" + + - name: Run the build process with Docker + uses: addnab/docker-run-action@v3 + with: + image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + ls -lh /icefall + + /icefall/.github/scripts/docker/run.sh diff --git a/egs/gigaspeech/ASR/zipformer/my_profile.py b/egs/gigaspeech/ASR/zipformer/my_profile.py new file mode 120000 index 000000000..3a90b2628 --- /dev/null +++ b/egs/gigaspeech/ASR/zipformer/my_profile.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/my_profile.py \ No newline at end of file diff --git a/egs/gigaspeech/ASR/zipformer/profile.py b/egs/gigaspeech/ASR/zipformer/profile.py deleted file mode 120000 index c93adbd14..000000000 --- a/egs/gigaspeech/ASR/zipformer/profile.py +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/zipformer/profile.py \ No newline at end of file diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py index 09e4a7af4..b844ba613 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless/profile.py +Usage: ./pruned_transducer_stateless/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless4/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py index 252bdf060..4bf773918 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless4/profile.py +Usage: ./pruned_transducer_stateless4/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/profile.py b/egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py similarity index 98% rename from egs/librispeech/ASR/pruned_transducer_stateless7/profile.py rename to egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py index 0d308e966..5a068b3b6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/profile.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./pruned_transducer_stateless7/profile.py +Usage: ./pruned_transducer_stateless7/my_profile.py """ import argparse diff --git a/egs/librispeech/ASR/zipformer/profile.py b/egs/librispeech/ASR/zipformer/my_profile.py similarity index 99% rename from egs/librispeech/ASR/zipformer/profile.py rename to egs/librispeech/ASR/zipformer/my_profile.py index 57f44a90a..ca20956fb 100755 --- a/egs/librispeech/ASR/zipformer/profile.py +++ b/egs/librispeech/ASR/zipformer/my_profile.py @@ -17,7 +17,7 @@ # limitations under the License. """ -Usage: ./zipformer/profile.py +Usage: ./zipformer/my_profile.py """ import argparse diff --git a/egs/tedlium3/ASR/zipformer/my_profile.py b/egs/tedlium3/ASR/zipformer/my_profile.py new file mode 120000 index 000000000..3a90b2628 --- /dev/null +++ b/egs/tedlium3/ASR/zipformer/my_profile.py @@ -0,0 +1 @@ +../../../librispeech/ASR/zipformer/my_profile.py \ No newline at end of file diff --git a/egs/tedlium3/ASR/zipformer/profile.py b/egs/tedlium3/ASR/zipformer/profile.py deleted file mode 120000 index c93adbd14..000000000 --- a/egs/tedlium3/ASR/zipformer/profile.py +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/zipformer/profile.py \ No newline at end of file From e5bb1ae86cc750a51626e9afcb973cc03fa72f86 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 24 Dec 2023 13:40:33 +0800 Subject: [PATCH 03/15] Use the CPU docker in CI to simplify the test code (#1427) --- .github/scripts/docker/Dockerfile | 21 ++-- .github/workflows/build-cpu-docker.yml | 8 +- .github/workflows/test.yml | 139 +++++++++--------------- .github/workflows/train-librispeech.yml | 8 +- 4 files changed, 72 insertions(+), 104 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index 55c3aa1b9..bbf978d26 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -14,6 +14,7 @@ RUN apt-get update -y && \ ffmpeg \ git \ git-lfs \ + graphviz \ less \ vim \ && \ @@ -32,23 +33,23 @@ RUN pip install --no-cache-dir \ k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ git+https://github.com/lhotse-speech/lhotse \ kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ + dill \ + graphviz \ kaldi_native_io \ kaldialign \ kaldifst \ kaldilm \ - sentencepiece>=0.1.96 \ - tensorboard \ - typeguard \ - dill \ - onnx \ - onnxruntime \ - onnxmltools \ - six \ + matplotlib \ multi_quantization \ - typeguard \ numpy \ + onnx \ + onnxmltools \ + onnxruntime \ pytest \ - graphviz + sentencepiece>=0.1.96 \ + six \ + tensorboard \ + typeguard # RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ # cd /workspace/icefall && \ diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index f931f7d09..b26cd2095 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -15,10 +15,10 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] k2-version: ["1.24.4.dev20231220"] kaldifeat-version: ["1.25.3.dev20231221"] - version: ["1.0"] + version: ["1.1"] steps: # refer to https://github.com/actions/checkout @@ -45,8 +45,12 @@ jobs: run: | cd .github/scripts/docker torch_version=${{ matrix.torch-version }} + + # see https://pytorch.org/audio/stable/installation.html#compatibility-matrix if [[ $torch_version == 1.13.0 ]]; then torchaudio_version=0.13.0 + elif [[ $torch_version == 1.13.1 ]]; then + torchaudio_version=0.13.1 elif [[ $torch_version == 2.0.0 ]]; then torchaudio_version=2.0.1 elif [[ $torch_version == 2.0.1 ]]; then diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 363556bb7..b3fd6f133 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,129 +1,94 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - name: test on: push: branches: - master + pull_request: branches: - master + workflow_dispatch: + concurrency: group: test-${{ github.ref }} cancel-in-progress: true jobs: test: + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.8"] - torch: ["1.13.0"] - torchaudio: ["0.13.0"] - k2-version: ["1.24.3.dev20230719"] - - fail-fast: false + python-version: ["3.8", "3.9", "3.10"] + torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + version: ["1.1"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - - name: Install libnsdfile and libsox - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo apt update - sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg - sudo apt install -q -y --fix-missing libsox-dev libsox-fmt-all - - - name: Install Python dependencies - run: | - python3 -m pip install --upgrade pip pytest - # numpy 1.20.x does not support python 3.6 - pip install numpy==1.19 - pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html - pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html - - pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.github.io/k2/cpu.html - pip install git+https://github.com/lhotse-speech/lhotse - # icefall requirements - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - pip install kaldifst - pip install onnxruntime matplotlib - pip install -r requirements.txt - - - name: Install graphviz - if: startsWith(matrix.os, 'ubuntu') + - name: Free space shell: bash run: | - python3 -m pip install -qq graphviz - sudo apt-get -qq install graphviz + df -h + rm -rf /opt/hostedtoolcache + df -h + echo "pwd: $PWD" + echo "github.workspace ${{ github.workspace }}" - name: Run tests - if: startsWith(matrix.os, 'ubuntu') - run: | - ls -lh - export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH - echo $PYTHONPATH - pytest -v -s ./test - # runt tests for conformer ctc - cd egs/librispeech/ASR/conformer_ctc - pytest -v -s + uses: addnab/docker-run-action@v3 + with: + image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - cd ../pruned_transducer_stateless - pytest -v -s + pytest -v -s ./test - cd ../pruned_transducer_stateless2 - pytest -v -s + # runt tests for conformer ctc + cd egs/librispeech/ASR/conformer_ctc + pytest -v -s - cd ../pruned_transducer_stateless3 - pytest -v -s + cd ../pruned_transducer_stateless + pytest -v -s - cd ../pruned_transducer_stateless4 - pytest -v -s + cd ../pruned_transducer_stateless2 + pytest -v -s - echo $PYTHONPATH - cd ../pruned_transducer_stateless7 - pytest -v -s + cd ../pruned_transducer_stateless3 + pytest -v -s - cd ../transducer_stateless - pytest -v -s + cd ../pruned_transducer_stateless4 + pytest -v -s - # cd ../transducer - # pytest -v -s + echo $PYTHONPATH + cd ../pruned_transducer_stateless7 + pytest -v -s - cd ../transducer_stateless2 - pytest -v -s + cd ../transducer_stateless + pytest -v -s - cd ../transducer_lstm - pytest -v -s + # cd ../transducer + # pytest -v -s - cd ../zipformer - pytest -v -s + cd ../transducer_stateless2 + pytest -v -s + + cd ../transducer_lstm + pytest -v -s + + cd ../zipformer + pytest -v -s - uses: actions/upload-artifact@v2 with: diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 7c9a28f03..53a2d5843 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -23,10 +23,8 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - k2-version: ["1.24.4.dev20231220"] - kaldifeat-version: ["1.25.3.dev20231221"] - version: ["1.0"] + torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + version: ["1.1"] steps: # refer to https://github.com/actions/checkout @@ -43,7 +41,7 @@ jobs: echo "pwd: $PWD" echo "github.workspace ${{ github.workspace }}" - - name: Run the build process with Docker + - name: Test zipformer/train.py with LibriSpeech uses: addnab/docker-run-action@v3 with: image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} From c855a58cfd8628dfe4ef2ffc0ac169d84a8ac0c5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Dec 2023 19:41:09 +0800 Subject: [PATCH 04/15] Generate the dependency matrix by code for GitHub Actions (#1431) --- .github/scripts/docker/Dockerfile | 2 + .../scripts/docker/generate_build_matrix.py | 79 ++++++++ .../{docker => librispeech/ASR}/run.sh | 11 +- .github/scripts/yesno/ASR/run.sh | 86 +++++++++ .github/workflows/build-cpu-docker.yml | 42 ++-- .github/workflows/run-yesno-recipe.yml | 182 ++++-------------- .github/workflows/test.yml | 27 ++- .github/workflows/train-librispeech.yml | 33 +++- 8 files changed, 279 insertions(+), 183 deletions(-) create mode 100755 .github/scripts/docker/generate_build_matrix.py rename .github/scripts/{docker => librispeech/ASR}/run.sh (87%) create mode 100755 .github/scripts/yesno/ASR/run.sh diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index bbf978d26..f75d74854 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -31,10 +31,12 @@ LABEL github_repo="https://github.com/k2-fsa/icefall" RUN pip install --no-cache-dir \ torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \ k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \ + \ git+https://github.com/lhotse-speech/lhotse \ kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \ dill \ graphviz \ + kaldi-decoder \ kaldi_native_io \ kaldialign \ kaldifst \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py new file mode 100755 index 000000000..4e494d810 --- /dev/null +++ b/.github/scripts/docker/generate_build_matrix.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang) + + +import json + + +def version_gt(a, b): + a_major, a_minor = a.split(".")[:2] + b_major, b_minor = b.split(".")[:2] + if a_major > b_major: + return True + + if a_major == b_major and a_minor > b_minor: + return True + + return False + + +def version_ge(a, b): + a_major, a_minor = a.split(".")[:2] + b_major, b_minor = b.split(".")[:2] + if a_major > b_major: + return True + + if a_major == b_major and a_minor >= b_minor: + return True + + return False + + +def get_torchaudio_version(torch_version): + if torch_version == "1.13.0": + return "0.13.0" + elif torch_version == "1.13.1": + return "0.13.1" + elif torch_version == "2.0.0": + return "2.0.1" + elif torch_version == "2.0.1": + return "2.0.2" + else: + return torch_version + + +def get_matrix(): + k2_version = "1.24.4.dev20231220" + kaldifeat_version = "1.25.3.dev20231221" + version = "1.1" + python_version = ["3.8", "3.9", "3.10", "3.11"] + torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + + matrix = [] + for p in python_version: + for t in torch_version: + # torchaudio <= 1.13.x supports only python <= 3.10 + + if version_gt(p, "3.10") and not version_gt(t, "2.0"): + continue + + matrix.append( + { + "k2-version": k2_version, + "kaldifeat-version": kaldifeat_version, + "version": version, + "python-version": p, + "torch-version": t, + "torchaudio-version": get_torchaudio_version(t), + } + ) + return matrix + + +def main(): + matrix = get_matrix() + print(json.dumps({"include": matrix})) + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/docker/run.sh b/.github/scripts/librispeech/ASR/run.sh similarity index 87% rename from .github/scripts/docker/run.sh rename to .github/scripts/librispeech/ASR/run.sh index aeb80b330..641d59458 100755 --- a/.github/scripts/docker/run.sh +++ b/.github/scripts/librispeech/ASR/run.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash + set -ex -cd /icefall -export PYTHONPATH=/icefall:$PYTHONPATH -python3 -c "import torch; print(torch.__file__)" -python3 -c "import torchaudio; print(torchaudio.__version__)" -python3 -c "import icefall; print(icefall.__file__)" +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} cd egs/librispeech/ASR diff --git a/.github/scripts/yesno/ASR/run.sh b/.github/scripts/yesno/ASR/run.sh new file mode 100755 index 000000000..05c8fbac9 --- /dev/null +++ b/.github/scripts/yesno/ASR/run.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/yesno/ASR + +log "data preparation" +./prepare.sh + +log "training" +python3 ./tdnn/train.py + +log "decoding" +python3 ./tdnn/decode.py + +log "export to pretrained.pt" + +python3 ./tdnn/export.py --epoch 14 --avg 2 + +python3 ./tdnn/pretrained.py \ + --checkpoint ./tdnn/exp/pretrained.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to torchscript" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test exporting to onnx" +python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 + +log "Test float32 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test int8 model" +python3 ./tdnn/onnx_pretrained.py \ + --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ + --HLG ./data/lang_phone/HLG.pt \ + --words-file ./data/lang_phone/words.txt \ + download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + download/waves_yesno/0_0_1_0_0_0_1_0.wav + +log "Test decoding with H" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_H.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --H ./data/lang_phone/H.fst \ + --tokens ./data/lang_phone/tokens.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Test decoding with HL" +python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 + +python3 ./tdnn/jit_pretrained_decode_with_HL.py \ + --nn-model ./tdnn/exp/cpu_jit.pt \ + --HL ./data/lang_phone/HL.fst \ + --words ./data/lang_phone/words.txt \ + ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ + ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ + ./download/waves_yesno/0_0_1_0_0_1_1_1.wav + +log "Show generated files" +ls -lh tdnn/exp +ls -lh data/lang_phone diff --git a/.github/workflows/build-cpu-docker.yml b/.github/workflows/build-cpu-docker.yml index b26cd2095..c5d5aaeb6 100644 --- a/.github/workflows/build-cpu-docker.yml +++ b/.github/workflows/build-cpu-docker.yml @@ -7,18 +7,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" build-cpu-docker: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - k2-version: ["1.24.4.dev20231220"] - kaldifeat-version: ["1.25.3.dev20231221"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: # refer to https://github.com/actions/checkout @@ -45,25 +58,14 @@ jobs: run: | cd .github/scripts/docker torch_version=${{ matrix.torch-version }} + torchaudio_version=${{ matrix.torchaudio-version }} - # see https://pytorch.org/audio/stable/installation.html#compatibility-matrix - if [[ $torch_version == 1.13.0 ]]; then - torchaudio_version=0.13.0 - elif [[ $torch_version == 1.13.1 ]]; then - torchaudio_version=0.13.1 - elif [[ $torch_version == 2.0.0 ]]; then - torchaudio_version=2.0.1 - elif [[ $torch_version == 2.0.1 ]]; then - torchaudio_version=2.0.2 - else - torchaudio_version=$torch_version - fi echo "torch_version: $torch_version" echo "torchaudio_version: $torchaudio_version" version=${{ matrix.version }} - tag=ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version + tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version echo "tag: $tag" docker build \ diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/run-yesno-recipe.yml index 9ac848535..a99811815 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/run-yesno-recipe.yml @@ -20,166 +20,60 @@ on: push: branches: - master + - refactor-ci + pull_request: branches: - master + workflow_dispatch: + concurrency: group: run-yesno-recipe-${{ github.ref }} cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" run-yesno-recipe: - runs-on: ${{ matrix.os }} + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest strategy: - matrix: - # os: [ubuntu-latest, macos-10.15] - # TODO: enable macOS for CPU testing - os: [ubuntu-latest] - python-version: [3.8] fail-fast: false + matrix: + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - name: Run the yesno recipe + uses: addnab/docker-run-action@v3 with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - - name: Install libnsdfile and libsox - if: startsWith(matrix.os, 'ubuntu') - run: | - sudo apt update - sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg - sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - pip install --no-deps --force-reinstall k2==1.24.4.dev20231021+cpu.torch1.13.1 -f https://k2-fsa.github.io/k2/cpu.html - pip install kaldifeat==1.25.1.dev20231022+cpu.torch1.13.1 -f https://csukuangfj.github.io/kaldifeat/cpu.html - - - name: Run yesno recipe - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - ./prepare.sh - python3 ./tdnn/train.py - python3 ./tdnn/decode.py - - - name: Test exporting to pretrained.pt - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 - - python3 ./tdnn/pretrained.py \ - --checkpoint ./tdnn/exp/pretrained.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to torchscript - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test exporting to onnx - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export_onnx.py --epoch 14 --avg 2 - - echo "Test float32 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - echo "Test int8 model" - python3 ./tdnn/onnx_pretrained.py \ - --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \ - --HLG ./data/lang_phone/HLG.pt \ - --words-file ./data/lang_phone/words.txt \ - download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - download/waves_yesno/0_0_1_0_0_0_1_0.wav - - - name: Test decoding with H - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_H.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --H ./data/lang_phone/H.fst \ - --tokens ./data/lang_phone/tokens.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Test decoding with HL - shell: bash - working-directory: ${{github.workspace}} - run: | - export PYTHONPATH=$PWD:$PYTHONPATH - echo $PYTHONPATH - - cd egs/yesno/ASR - python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1 - - python3 ./tdnn/jit_pretrained_decode_with_HL.py \ - --nn-model ./tdnn/exp/cpu_jit.pt \ - --HL ./data/lang_phone/HL.fst \ - --words ./data/lang_phone/words.txt \ - ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \ - ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \ - ./download/waves_yesno/0_0_1_0_0_1_1_1.wav - - - name: Show generated files - shell: bash - working-directory: ${{github.workspace}} - run: | - cd egs/yesno/ASR - ls -lh tdnn/exp - ls -lh data/lang_phone + .github/scripts/yesno/ASR/run.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3fd6f133..659681b37 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,16 +16,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" test: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: - uses: actions/checkout@v4 @@ -44,7 +59,7 @@ jobs: - name: Run tests uses: addnab/docker-run-action@v3 with: - image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/train-librispeech.yml index 53a2d5843..79002a881 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/train-librispeech.yml @@ -15,16 +15,31 @@ concurrency: cancel-in-progress: true jobs: + generate_build_matrix: + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" train-librispeech: + needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] - torch-version: ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] - version: ["1.1"] + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} steps: # refer to https://github.com/actions/checkout @@ -44,11 +59,13 @@ jobs: - name: Test zipformer/train.py with LibriSpeech uses: addnab/docker-run-action@v3 with: - image: ghcr.io/k2-fsa/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} options: | --volume ${{ github.workspace }}/:/icefall shell: bash run: | - ls -lh /icefall + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall - /icefall/.github/scripts/docker/run.sh + .github/scripts/librispeech/ASR/run.sh From ddd71313179a1565ea4d9e2e37546c3ef6b98d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ali=20Haznedaro=C4=9Flu?= <53865510+ahazned@users.noreply.github.com> Date: Mon, 25 Dec 2023 14:44:07 +0300 Subject: [PATCH 05/15] Update TTS export-onnx.py scripts for handling variable token counts (#1430) --- egs/ljspeech/TTS/vits/export-onnx.py | 6 +++++- egs/vctk/TTS/vits/export-onnx.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/egs/ljspeech/TTS/vits/export-onnx.py b/egs/ljspeech/TTS/vits/export-onnx.py index 36a9de27f..f82f9dbe9 100755 --- a/egs/ljspeech/TTS/vits/export-onnx.py +++ b/egs/ljspeech/TTS/vits/export-onnx.py @@ -149,6 +149,7 @@ class OnnxModel(nn.Module): def export_model_onnx( model: nn.Module, model_filename: str, + vocab_size: int, opset_version: int = 11, ) -> None: """Export the given generator model to ONNX format. @@ -165,10 +166,12 @@ def export_model_onnx( The VITS generator. model_filename: The filename to save the exported ONNX model. + vocab_size: + Number of tokens used in training. opset_version: The opset version to use. """ - tokens = torch.randint(low=0, high=79, size=(1, 13), dtype=torch.int64) + tokens = torch.randint(low=0, high=vocab_size, size=(1, 13), dtype=torch.int64) tokens_lens = torch.tensor([tokens.shape[1]], dtype=torch.int64) noise_scale = torch.tensor([1], dtype=torch.float32) noise_scale_dur = torch.tensor([1], dtype=torch.float32) @@ -244,6 +247,7 @@ def main(): export_model_onnx( model, model_filename, + params.vocab_size, opset_version=opset_version, ) logging.info(f"Exported generator to {model_filename}") diff --git a/egs/vctk/TTS/vits/export-onnx.py b/egs/vctk/TTS/vits/export-onnx.py index 667ac284b..80d155626 100755 --- a/egs/vctk/TTS/vits/export-onnx.py +++ b/egs/vctk/TTS/vits/export-onnx.py @@ -159,6 +159,7 @@ class OnnxModel(nn.Module): def export_model_onnx( model: nn.Module, model_filename: str, + vocab_size: int, opset_version: int = 11, ) -> None: """Export the given generator model to ONNX format. @@ -175,10 +176,12 @@ def export_model_onnx( The VITS generator. model_filename: The filename to save the exported ONNX model. + vocab_size: + Number of tokens used in training. opset_version: The opset version to use. """ - tokens = torch.randint(low=0, high=79, size=(1, 13), dtype=torch.int64) + tokens = torch.randint(low=0, high=vocab_size, size=(1, 13), dtype=torch.int64) tokens_lens = torch.tensor([tokens.shape[1]], dtype=torch.int64) noise_scale = torch.tensor([1], dtype=torch.float32) noise_scale_dur = torch.tensor([1], dtype=torch.float32) @@ -261,6 +264,7 @@ def main(): export_model_onnx( model, model_filename, + params.vocab_size, opset_version=opset_version, ) logging.info(f"Exported generator to {model_filename}") From 835a92eba51a939c6b4a069a53cc1e3ddeabd9a5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 25 Dec 2023 20:23:56 +0800 Subject: [PATCH 06/15] Add doc about how to use the CPU-only docker images (#1432) --- docs/source/docker/intro.rst | 46 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/docs/source/docker/intro.rst b/docs/source/docker/intro.rst index 9ead0df00..cbd300d9b 100644 --- a/docs/source/docker/intro.rst +++ b/docs/source/docker/intro.rst @@ -20,7 +20,11 @@ We describe the following items in this section: View available tags =================== -You can use the following command to view available tags: +CUDA-enabled docker images +-------------------------- + +You can use the following command to view available tags for CUDA-enabled +docker images: .. code-block:: bash @@ -43,8 +47,25 @@ which will give you something like below: Please select an appropriate combination of `torch`_ and CUDA. -Download a docker image -======================= +CPU-only docker images +---------------------- + +To view CPU-only docker images, please visit ``_ +for available tags. + +You can select different combinations of ``Python`` and ``torch``. For instance, +to select ``Python 3.8`` and ``torch 2.1.2``, you can use the following tag + +.. code-block:: bash + + cpu-py3.8-torch2.1.2-v1.1 + +where ``v1.1`` is the current version of the docker image. You may see +``ghcr.io/k2-fsa/icefall:cpu-py3.8-torch2.1.2-v1.2`` or some other versions. +We recommend that you always use the latest version. + +Download a docker image (CUDA) +============================== Suppose that you select the tag ``torch1.13.0-cuda11.6``, you can use the following command to download it: @@ -53,6 +74,16 @@ the following command to download it: sudo docker image pull k2fsa/icefall:torch1.13.0-cuda11.6 +Download a docker image (CPU) +============================== + +Suppose that you select the tag ``cpu-py3.8-torch2.1.2-v1.1``, you can use +the following command to download it: + +.. code-block:: bash + + sudo docker pull ghcr.io/k2-fsa/icefall:cpu-py3.8-torch2.1.2-v1.1 + Run a docker image with GPU =========================== @@ -65,7 +96,7 @@ Run a docker image with CPU .. code-block:: bash - sudo docker run --rm -it k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash + sudo docker run --rm -it ghcr.io/k2-fsa/icefall:cpu-py3.8-torch2.1.2-v1.1 /bin/bash Run yesno within a docker container =================================== @@ -74,8 +105,13 @@ After starting the container, the following interface is presented: .. code-block:: bash + # GPU-enabled docker root@60c947eac59c:/workspace/icefall# + # CPU-only docker + root@60c947eac59c:# mkdir /workspace; git clone https://github.com/k2-fsa/icefall + root@60c947eac59c:# export PYTHONPATH=/workspace/icefall:$PYTHONPATH + It shows the current user is ``root`` and the current working directory is ``/workspace/icefall``. @@ -107,7 +143,7 @@ to switch to the ``yesno`` recipe and run .. hint:: - If you are running without GPU, it may report the following error: + If you are running without GPU with a GPU-enabled docker, it may report the following error: .. code-block:: bash From db52fe2349df0e07e931accb0cf1e63fec389fb7 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 26 Dec 2023 20:29:43 +0800 Subject: [PATCH 07/15] Refactor CI test for aishell (#1435) --- .github/scripts/aishell/ASR/run.sh | 274 ++++++++++++++++++ .github/scripts/docker/Dockerfile | 1 + .../scripts/docker/generate_build_matrix.py | 2 +- ...pruned-transducer-stateless3-2022-06-20.sh | 87 ------ .../run-aishell-zipformer-2023-10-24.sh | 103 ------- ...transducer-stateless-modified-2-aishell.sh | 48 --- ...d-transducer-stateless-modified-aishell.sh | 48 --- .github/workflows/aishell.yml | 81 ++++++ .github/workflows/run-aishell-2022-06-20.yml | 123 -------- .../run-aishell-zipformer-2023-10-24.yml | 95 ------ ...ransducer-stateless-modified-2-aishell.yml | 80 ----- ...-transducer-stateless-modified-aishell.yml | 80 ----- .../{run-yesno-recipe.yml => yesno.yml} | 23 +- 13 files changed, 360 insertions(+), 685 deletions(-) create mode 100755 .github/scripts/aishell/ASR/run.sh delete mode 100755 .github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh delete mode 100755 .github/scripts/run-aishell-zipformer-2023-10-24.sh delete mode 100755 .github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh delete mode 100755 .github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh create mode 100644 .github/workflows/aishell.yml delete mode 100644 .github/workflows/run-aishell-2022-06-20.yml delete mode 100644 .github/workflows/run-aishell-zipformer-2023-10-24.yml delete mode 100644 .github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml delete mode 100644 .github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml rename .github/workflows/{run-yesno-recipe.yml => yesno.yml} (69%) diff --git a/.github/scripts/aishell/ASR/run.sh b/.github/scripts/aishell/ASR/run.sh new file mode 100755 index 000000000..4d912fa76 --- /dev/null +++ b/.github/scripts/aishell/ASR/run.sh @@ -0,0 +1,274 @@ +#!/usr/bin/env bash + +set -ex + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +cd egs/aishell/ASR + +function download_test_dev_manifests() { + git lfs install + + fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests + log "Downloading pre-commputed fbank from $fbank_url" + + git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests + ln -s $PWD/aishell-test-dev-manifests/data . +} + +function test_transducer_stateless3_2022_06_20() { + repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20 + log "Downloading pre-trained model from $repo_url" + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt + popd + + log "test greedy_search with pretrained.py" + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless3/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + + log "test beam search with pretrained.py" + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless3/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + + echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" + echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" + if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then + mkdir -p pruned_transducer_stateless3/exp + ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt + ln -s $PWD/$repo/data/lang_char data/ + + ls -lh data + ls -lh pruned_transducer_stateless3/exp + + log "Decoding test and dev" + + # use a small value for decoding with CPU + max_duration=100 + + for method in greedy_search fast_beam_search modified_beam_search; do + log "Decoding with $method" + + ./pruned_transducer_stateless3/decode.py \ + --decoding-method $method \ + --epoch 999 \ + --avg 1 \ + --max-duration $max_duration \ + --exp-dir pruned_transducer_stateless3/exp + done + + rm pruned_transducer_stateless3/exp/*.pt + fi + + rm -rf $repo +} + +function test_zipformer_large_2023_10_24() { + log "CI testing large model" + repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/ + log "Downloading pre-trained model from $repo_url" + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for method in modified_beam_search greedy_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --method $method \ + --context-size 1 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_char/tokens.txt \ + --num-encoder-layers 2,2,4,5,4,2 \ + --feedforward-dim 512,768,1536,2048,1536,768 \ + --encoder-dim 192,256,512,768,512,256 \ + --encoder-unmasked-dim 192,192,256,320,256,192 \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + rm -rf $repo +} + +function test_zipformer_2023_10_24() { + repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/ + log "Downloading pre-trained model from $repo_url" + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + + for method in modified_beam_search greedy_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --method $method \ + --context-size 1 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_char/tokens.txt \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + rm -rf $repo +} + +function test_zipformer_small_2023_10_24() { + log "CI testing small model" + repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/ + log "Downloading pre-trained model from $repo_url" + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + + for method in modified_beam_search greedy_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --method $method \ + --context-size 1 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_char/tokens.txt \ + --num-encoder-layers 2,2,2,2,2,2 \ + --feedforward-dim 512,768,768,768,768,768 \ + --encoder-dim 192,256,256,256,256,256 \ + --encoder-unmasked-dim 192,192,192,192,192,192 \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + rm -rf $repo +} + +function test_transducer_stateless_modified_2022_03_01() { + repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless_modified/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + + for method in modified_beam_search beam_search; do + log "$method" + + ./transducer_stateless_modified/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + rm -rf $repo +} + +function test_transducer_stateless_modified_2_2022_03_01() { + repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless_modified-2/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + + for method in modified_beam_search beam_search; do + log "$method" + + ./transducer_stateless_modified-2/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_char \ + $repo/test_wavs/BAC009S0764W0121.wav \ + $repo/test_wavs/BAC009S0764W0122.wav \ + $repo/test_wavs/BAC009S0764W0123.wav + done + rm -rf $repo +} + +download_test_dev_manifests +test_transducer_stateless3_2022_06_20 +test_zipformer_large_2023_10_24 +test_zipformer_2023_10_24 +test_zipformer_small_2023_10_24 +test_transducer_stateless_modified_2022_03_01 +test_transducer_stateless_modified_2_2022_03_01 + +ls -lh diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index f75d74854..f6a088af1 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update -y && \ git-lfs \ graphviz \ less \ + tree \ vim \ && \ apt-get clean && \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 4e494d810..bdde97647 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -45,7 +45,7 @@ def get_torchaudio_version(torch_version): def get_matrix(): k2_version = "1.24.4.dev20231220" kaldifeat_version = "1.25.3.dev20231221" - version = "1.1" + version = "1.2" python_version = ["3.8", "3.9", "3.10", "3.11"] torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] diff --git a/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh b/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh deleted file mode 100755 index c3640cfde..000000000 --- a/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/aishell/ASR - -git lfs install - -fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests -log "Downloading pre-commputed fbank from $fbank_url" - -git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests -ln -s $PWD/aishell-test-dev-manifests/data . - -repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20 -log "Downloading pre-trained model from $repo_url" -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt -popd - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless3/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless3/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless3/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_char data/ - - ls -lh data - ls -lh pruned_transducer_stateless3/exp - - log "Decoding test and dev" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless3/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless3/exp - done - - rm pruned_transducer_stateless3/exp/*.pt -fi diff --git a/.github/scripts/run-aishell-zipformer-2023-10-24.sh b/.github/scripts/run-aishell-zipformer-2023-10-24.sh deleted file mode 100755 index 865e29799..000000000 --- a/.github/scripts/run-aishell-zipformer-2023-10-24.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/aishell/ASR - -git lfs install - -fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests -log "Downloading pre-commputed fbank from $fbank_url" - -git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests -ln -s $PWD/aishell-test-dev-manifests/data . - -log "=======================" -log "CI testing large model" -repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/ -log "Downloading pre-trained model from $repo_url" -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for method in modified_beam_search greedy_search fast_beam_search; do - log "$method" - - ./zipformer/pretrained.py \ - --method $method \ - --context-size 1 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_char/tokens.txt \ - --num-encoder-layers 2,2,4,5,4,2 \ - --feedforward-dim 512,768,1536,2048,1536,768 \ - --encoder-dim 192,256,512,768,512,256 \ - --encoder-unmasked-dim 192,192,256,320,256,192 \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - -log "=======================" -log "CI testing medium model" -repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/ -log "Downloading pre-trained model from $repo_url" -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - - -for method in modified_beam_search greedy_search fast_beam_search; do - log "$method" - - ./zipformer/pretrained.py \ - --method $method \ - --context-size 1 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_char/tokens.txt \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - - -log "=======================" -log "CI testing small model" -repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/ -log "Downloading pre-trained model from $repo_url" -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - - -for method in modified_beam_search greedy_search fast_beam_search; do - log "$method" - - ./zipformer/pretrained.py \ - --method $method \ - --context-size 1 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_char/tokens.txt \ - --num-encoder-layers 2,2,2,2,2,2 \ - --feedforward-dim 512,768,768,768,768,768 \ - --encoder-dim 192,256,256,256,256,256 \ - --encoder-unmasked-dim 192,192,192,192,192,192 \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - diff --git a/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh b/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh deleted file mode 100755 index 0644d9be0..000000000 --- a/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/aishell/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless_modified-2/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - -for method in modified_beam_search beam_search; do - log "$method" - - ./transducer_stateless_modified-2/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done diff --git a/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh b/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh deleted file mode 100755 index 79fb64311..000000000 --- a/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/aishell/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless_modified/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done - -for method in modified_beam_search beam_search; do - log "$method" - - ./transducer_stateless_modified/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_char \ - $repo/test_wavs/BAC009S0764W0121.wav \ - $repo/test_wavs/BAC009S0764W0122.wav \ - $repo/test_wavs/BAC009S0764W0123.wav -done diff --git a/.github/workflows/aishell.yml b/.github/workflows/aishell.yml new file mode 100644 index 000000000..136e117bd --- /dev/null +++ b/.github/workflows/aishell.yml @@ -0,0 +1,81 @@ +name: aishell + +on: + push: + branches: + - master + + pull_request: + branches: + - master + + workflow_dispatch: + + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly build at 15:50 UTC time every day + - cron: "50 15 * * *" + +concurrency: + group: aishell-${{ github.ref }} + cancel-in-progress: true + +jobs: + generate_build_matrix: + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'aishell' || github.event_name == 'schedule') + + # see https://github.com/pytorch/pytorch/pull/50633 + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Generating build matrix + id: set-matrix + run: | + # outputting for debugging purposes + python ./.github/scripts/docker/generate_build_matrix.py + MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) + echo "::set-output name=matrix::${MATRIX}" + aishell: + needs: generate_build_matrix + name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }} + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Free space + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + echo "pwd: $PWD" + echo "github.workspace ${{ github.workspace }}" + + - name: Run aishell tests + uses: addnab/docker-run-action@v3 + with: + image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }} + options: | + --volume ${{ github.workspace }}/:/icefall + shell: bash + run: | + export PYTHONPATH=/icefall:$PYTHONPATH + cd /icefall + git config --global --add safe.directory /icefall + + .github/scripts/aishell/ASR/run.sh diff --git a/.github/workflows/run-aishell-2022-06-20.yml b/.github/workflows/run-aishell-2022-06-20.yml deleted file mode 100644 index 53fcb2c03..000000000 --- a/.github/workflows/run-aishell-2022-06-20.yml +++ /dev/null @@ -1,123 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-aishell-2022-06-20 -# pruned RNN-T + reworked model with random combiner -# https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20 - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_aishell_2022_06_20-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_aishell_2022_06_20: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh - - - name: Display decoding results for aishell pruned_transducer_stateless3 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/aishell/ASR/ - tree ./pruned_transducer_stateless3/exp - - cd pruned_transducer_stateless3 - echo "results for pruned_transducer_stateless3" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2 - - - name: Upload decoding results for aishell pruned_transducer_stateless3 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: aishell-torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-06-20 - path: egs/aishell/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-aishell-zipformer-2023-10-24.yml b/.github/workflows/run-aishell-zipformer-2023-10-24.yml deleted file mode 100644 index f2fb44a5f..000000000 --- a/.github/workflows/run-aishell-zipformer-2023-10-24.yml +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright 2023 Zengrui Jin (Xiaomi Corp.) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-aishell-zipformer-2023-10-24 - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_aishell_zipformer_2023_10_24-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_aishell_zipformer_2023_10_24: - if: github.event.label.name == 'ready' || github.event.label.name == 'zipformer' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-aishell-zipformer-2023-10-24.sh - - \ No newline at end of file diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml deleted file mode 100644 index ce6d6f92d..000000000 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-trandsucer-stateless-modified-2-aishell - -on: - push: - branches: - - master - pull_request: - types: [labeled] - -concurrency: - group: run_pre_trained_transducer_stateless_modified_2_aishell-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer_stateless_modified_2_aishell: - if: github.event.label.name == 'ready' || github.event_name == 'push' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Inference with pre-trained model - shell: bash - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - .github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml deleted file mode 100644 index f0cebd94a..000000000 --- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-trandsucer-stateless-modified-aishell - -on: - push: - branches: - - master - pull_request: - types: [labeled] - -concurrency: - group: run_pre_trained_transducer_stateless_modified_aishell-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer_stateless_modified_aishell: - if: github.event.label.name == 'ready' || github.event_name == 'push' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Inference with pre-trained model - shell: bash - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - .github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh diff --git a/.github/workflows/run-yesno-recipe.yml b/.github/workflows/yesno.yml similarity index 69% rename from .github/workflows/run-yesno-recipe.yml rename to .github/workflows/yesno.yml index a99811815..182300dfa 100644 --- a/.github/workflows/run-yesno-recipe.yml +++ b/.github/workflows/yesno.yml @@ -1,26 +1,9 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-yesno-recipe +name: yesno on: push: branches: - master - - refactor-ci pull_request: branches: @@ -29,7 +12,7 @@ on: workflow_dispatch: concurrency: - group: run-yesno-recipe-${{ github.ref }} + group: yesno-${{ github.ref }} cancel-in-progress: true jobs: @@ -50,7 +33,7 @@ jobs: python ./.github/scripts/docker/generate_build_matrix.py MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) echo "::set-output name=matrix::${MATRIX}" - run-yesno-recipe: + yesno: needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} runs-on: ubuntu-latest From 140e6381ad4699ce919705f59240fad58c0b1bb6 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 27 Dec 2023 13:21:14 +0800 Subject: [PATCH 08/15] Refactor CI tests for librispeech (#1436) --- .github/scripts/aishell/ASR/run.sh | 73 +- .github/scripts/librispeech/ASR/run.sh | 1624 ++++++++++++++++- ...n-librispeech-conformer-ctc3-2022-11-28.sh | 122 -- ...-pruned-transducer-stateless-2022-03-12.sh | 77 - ...pruned-transducer-stateless2-2022-04-29.sh | 86 - ...pruned-transducer-stateless3-2022-04-29.sh | 85 - ...pruned-transducer-stateless3-2022-05-13.sh | 123 -- ...pruned-transducer-stateless5-2022-05-13.sh | 100 - ...pruned-transducer-stateless7-2022-11-11.sh | 106 -- ...ed-transducer-stateless7-ctc-2022-12-01.sh | 150 -- ...transducer-stateless7-ctc-bs-2023-01-29.sh | 147 -- ...nsducer-stateless7-streaming-2022-12-29.sh | 148 -- ...pruned-transducer-stateless8-2022-11-14.sh | 115 -- ...pruned-transducer-stateless2-2022-06-26.sh | 101 - ...rispeech-streaming-zipformer-2023-05-18.sh | 116 -- ...speech-transducer-stateless2-2022-04-19.sh | 77 - .../run-librispeech-zipformer-2023-05-18.sh | 94 - ...un-librispeech-zipformer-ctc-2023-06-14.sh | 117 -- ...un-librispeech-zipformer-mmi-2022-12-08.sh | 102 -- .github/scripts/run-pre-trained-ctc.sh | 240 --- ...d-transducer-stateless-librispeech-100h.sh | 77 - ...d-transducer-stateless-librispeech-960h.sh | 77 - .../run-pre-trained-transducer-stateless.sh | 77 - .github/scripts/run-pre-trained-transducer.sh | 33 - .github/workflows/aishell.yml | 11 +- ...{train-librispeech.yml => librispeech.yml} | 6 +- .../workflows/run-librispeech-2022-03-12.yml | 159 -- .../workflows/run-librispeech-2022-04-29.yml | 185 -- .../workflows/run-librispeech-2022-05-13.yml | 159 -- .../run-librispeech-2022-11-11-stateless7.yml | 159 -- .../run-librispeech-2022-11-14-stateless8.yml | 159 -- ...-librispeech-2022-12-01-stateless7-ctc.yml | 163 -- ...n-librispeech-2022-12-08-zipformer-mmi.yml | 167 -- ...speech-2022-12-29-stateless7-streaming.yml | 172 -- ...brispeech-2023-01-29-stateless7-ctc-bs.yml | 163 -- ...-librispeech-conformer-ctc3-2022-11-28.yml | 155 -- ...runed-transducer-stateless3-2022-05-13.yml | 157 -- ...aming-transducer-stateless2-2022-06-26.yml | 159 -- ...ispeech-streaming-zipformer-2023-05-18.yml | 174 -- ...peech-transducer-stateless2-2022-04-19.yml | 159 -- .../run-librispeech-zipformer-2023-05-18.yml | 159 -- ...n-librispeech-zipformer-ctc-2023-06-14.yml | 155 -- .github/workflows/run-pretrained-ctc.yml | 87 - ...-transducer-stateless-librispeech-100h.yml | 158 -- ...r-stateless-librispeech-multi-datasets.yml | 158 -- .../run-pretrained-transducer-stateless.yml | 158 -- .../workflows/run-pretrained-transducer.yml | 80 - 47 files changed, 1658 insertions(+), 5671 deletions(-) delete mode 100755 .github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh delete mode 100755 .github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh delete mode 100755 .github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh delete mode 100755 .github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh delete mode 100755 .github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh delete mode 100755 .github/scripts/run-librispeech-zipformer-2023-05-18.sh delete mode 100755 .github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh delete mode 100755 .github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh delete mode 100755 .github/scripts/run-pre-trained-ctc.sh delete mode 100755 .github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh delete mode 100755 .github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh delete mode 100755 .github/scripts/run-pre-trained-transducer-stateless.sh delete mode 100755 .github/scripts/run-pre-trained-transducer.sh rename .github/workflows/{train-librispeech.yml => librispeech.yml} (95%) delete mode 100644 .github/workflows/run-librispeech-2022-03-12.yml delete mode 100644 .github/workflows/run-librispeech-2022-04-29.yml delete mode 100644 .github/workflows/run-librispeech-2022-05-13.yml delete mode 100644 .github/workflows/run-librispeech-2022-11-11-stateless7.yml delete mode 100644 .github/workflows/run-librispeech-2022-11-14-stateless8.yml delete mode 100644 .github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml delete mode 100644 .github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml delete mode 100644 .github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml delete mode 100644 .github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml delete mode 100644 .github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml delete mode 100644 .github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml delete mode 100644 .github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml delete mode 100644 .github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml delete mode 100644 .github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml delete mode 100644 .github/workflows/run-librispeech-zipformer-2023-05-18.yml delete mode 100644 .github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml delete mode 100644 .github/workflows/run-pretrained-ctc.yml delete mode 100644 .github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml delete mode 100644 .github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml delete mode 100644 .github/workflows/run-pretrained-transducer-stateless.yml delete mode 100644 .github/workflows/run-pretrained-transducer.yml diff --git a/.github/scripts/aishell/ASR/run.sh b/.github/scripts/aishell/ASR/run.sh index 4d912fa76..f150b6337 100755 --- a/.github/scripts/aishell/ASR/run.sh +++ b/.github/scripts/aishell/ASR/run.sh @@ -263,6 +263,76 @@ function test_transducer_stateless_modified_2_2022_03_01() { rm -rf $repo } +function test_conformer_ctc() { + repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + + git lfs pull --include "exp/pretrained.pt" + git lfs pull --include "data/lang_char/H.fst" + git lfs pull --include "data/lang_char/HL.fst" + git lfs pull --include "data/lang_char/HLG.fst" + + popd + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + log "CTC decoding" + + log "Exporting model with torchscript" + + pushd $repo/exp + ln -s pretrained.pt epoch-99.pt + popd + + ./conformer_ctc/export.py \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_char/tokens.txt \ + --jit 1 + + ls -lh $repo/exp + + ls -lh $repo/data/lang_char + + log "Decoding with H on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_H.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --H $repo/data/lang_char/H.fst \ + --tokens $repo/data/lang_char/tokens.txt \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + log "Decoding with HL on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_HL.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --HL $repo/data/lang_char/HL.fst \ + --words $repo/data/lang_char/words.txt \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + log "Decoding with HLG on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_HLG.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --HLG $repo/data/lang_char/HLG.fst \ + --words $repo/data/lang_char/words.txt \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + rm -rf $repo +} + download_test_dev_manifests test_transducer_stateless3_2022_06_20 test_zipformer_large_2023_10_24 @@ -270,5 +340,4 @@ test_zipformer_2023_10_24 test_zipformer_small_2023_10_24 test_transducer_stateless_modified_2022_03_01 test_transducer_stateless_modified_2_2022_03_01 - -ls -lh +# test_conformer_ctc # fails for torch 1.13.x and torch 2.0.x diff --git a/.github/scripts/librispeech/ASR/run.sh b/.github/scripts/librispeech/ASR/run.sh index 641d59458..7e9bd8a47 100755 --- a/.github/scripts/librispeech/ASR/run.sh +++ b/.github/scripts/librispeech/ASR/run.sh @@ -10,52 +10,1594 @@ log() { cd egs/librispeech/ASR -# We don't download the LM file since it is so large that it will -# cause OOM error for CI later. -mkdir -p download/lm -pushd download/lm -wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt -wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt -wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz -ls -lh -gunzip librispeech-lm-norm.txt.gz +function prepare_data() { + # We don't download the LM file since it is so large that it will + # cause OOM error for CI later. + mkdir -p download/lm + pushd download/lm + wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt + wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt + wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz + ls -lh + gunzip librispeech-lm-norm.txt.gz -ls -lh -popd + ls -lh + popd -pushd download/ -wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2 -tar xf LibriSpeech.tar.bz2 -rm LibriSpeech.tar.bz2 + pushd download/ + wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2 + tar xf LibriSpeech.tar.bz2 + rm LibriSpeech.tar.bz2 -cd LibriSpeech -ln -s train-clean-100 train-clean-360 -ln -s train-other-500 train-other-500 -popd + cd LibriSpeech + ln -s train-clean-100 train-clean-360 + ln -s train-other-500 train-other-500 + popd -mkdir -p data/manifests + mkdir -p data/manifests -lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests -ls -lh data/manifests + lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests + ls -lh data/manifests -./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False -ls -lh data/fbank + ./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False + ls -lh data/fbank -./prepare.sh --stage 5 --stop-stage 6 + ./prepare.sh --stage 5 --stop-stage 6 +} -./zipformer/train.py \ - --world-size 1 \ - --num-epochs 1 \ - --start-epoch 1 \ - --use-fp16 0 \ - --exp-dir zipformer/exp-small \ - --causal 0 \ - --num-encoder-layers 1,1,1,1,1,1 \ - --feedforward-dim 64,96,96,96,96,96 \ - --encoder-dim 32,64,64,64,64,64 \ - --encoder-unmasked-dim 32,32,32,32,32,32 \ - --base-lr 0.04 \ - --full-libri 0 \ - --enable-musan 0 \ - --max-duration 30 \ - --print-diagnostics 1 +function run_diagnostics() { + ./zipformer/train.py \ + --world-size 1 \ + --num-epochs 1 \ + --start-epoch 1 \ + --use-fp16 0 \ + --exp-dir zipformer/exp-small \ + --causal 0 \ + --num-encoder-layers 1,1,1,1,1,1 \ + --feedforward-dim 64,96,96,96,96,96 \ + --encoder-dim 32,64,64,64,64,64 \ + --encoder-unmasked-dim 32,32,32,32,32,32 \ + --base-lr 0.04 \ + --full-libri 0 \ + --enable-musan 0 \ + --max-duration 30 \ + --print-diagnostics 1 +} + +function test_pruned_transducer_stateless_2022_03_12() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in fast_beam_search modified_beam_search beam_search; do + log "$method" + + ./pruned_transducer_stateless/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless2_2022_04_29() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + pushd $repo + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt" + popd + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-epoch-38-avg-10.pt pretrained.pt + popd + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless2/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless2/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless3_2022_04_29() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt" + popd + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-epoch-25-avg-6.pt pretrained.pt + popd + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless3/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless3/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless5_2022_05_13() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-epoch-39-avg-7.pt pretrained.pt + popd + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless5/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --num-encoder-layers 18 \ + --dim-feedforward 2048 \ + --nhead 8 \ + --encoder-dim 512 \ + --decoder-dim 512 \ + --joiner-dim 512 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless5/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav \ + --num-encoder-layers 18 \ + --dim-feedforward 2048 \ + --nhead 8 \ + --encoder-dim 512 \ + --decoder-dim 512 \ + --joiner-dim 512 + done + rm -rf $repo +} + +function test_pruned_transducer_stateless7_2022_11_11() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./pruned_transducer_stateless7/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless7/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless7/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless7/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless8_2022_11_14() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless8/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "Export to torchscript model" + ./pruned_transducer_stateless8/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --use-averaged-model false \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless8/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless8/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless8/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless7_ctc_2022_12_01() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/HLG.pt" + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/LG.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lm/G_4_gram.pt" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./pruned_transducer_stateless7_ctc/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless7_ctc/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for m in ctc-decoding 1best; do + ./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ + --model-filename $repo/exp/cpu_jit.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --G $repo/data/lm/G_4_gram.pt \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless7_ctc/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless7_ctc/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for m in ctc-decoding 1best; do + ./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ + --checkpoint $repo/exp/pretrained.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --G $repo/data/lm/G_4_gram.pt \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_zipformer_mmi_2022_12_08() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/3gram.pt" + git lfs pull --include "data/lang_bpe_500/4gram.pt" + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/LG.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./zipformer_mmi/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./zipformer_mmi/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + --lang-dir $repo/data/lang_bpe_500 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do + log "$method" + + ./zipformer_mmi/pretrained.py \ + --method $method \ + --checkpoint $repo/exp/pretrained.pt \ + --lang-dir $repo/data/lang_bpe_500 \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless7_streaming_2022_12_29() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + git lfs pull --include "exp/encoder_jit_trace.pt" + git lfs pull --include "exp/decoder_jit_trace.pt" + git lfs pull --include "exp/joiner_jit_trace.pt" + cd exp + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./pruned_transducer_stateless7_streaming/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --decode-chunk-len 32 \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless7_streaming/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + --decode-chunk-len 32 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "Export to torchscript model by torch.jit.trace()" + ./pruned_transducer_stateless7_streaming/jit_trace_export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --decode-chunk-len 32 \ + --epoch 99 \ + --avg 1 + + log "Decode with models exported by torch.jit.trace()" + + ./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --encoder-model-filename $repo/exp/encoder_jit_trace.pt \ + --decoder-model-filename $repo/exp/decoder_jit_trace.pt \ + --joiner-model-filename $repo/exp/joiner_jit_trace.pt \ + --decode-chunk-len 32 \ + $repo/test_wavs/1089-134686-0001.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless7_streaming/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --decode-chunk-len 32 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless7_streaming/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --decode-chunk-len 32 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + rm -rf $repo +} + +function test_pruned_transducer_stateless7_ctc_bs_2023_01_29() { + repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/LG.pt" + git lfs pull --include "data/lang_bpe_500/HLG.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "exp/cpu_jit.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./pruned_transducer_stateless7_ctc_bs/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --nn-model-filename $repo/exp/cpu_jit.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for m in ctc-decoding 1best; do + ./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ + --model-filename $repo/exp/cpu_jit.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for m in ctc-decoding 1best; do + ./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ + --checkpoint $repo/exp/pretrained.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_conformer_ctc3_2022_11_27() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27 + + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/HLG.pt" + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/LG.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lm/G_4_gram.pt" + git lfs pull --include "exp/jit_trace.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Decode with models exported by torch.jit.trace()" + + for m in ctc-decoding 1best; do + ./conformer_ctc3/jit_pretrained.py \ + --model-filename $repo/exp/jit_trace.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --G $repo/data/lm/G_4_gram.pt \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + log "Export to torchscript model" + + ./conformer_ctc3/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --jit-trace 1 \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model 0 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.trace()" + + for m in ctc-decoding 1best; do + ./conformer_ctc3/jit_pretrained.py \ + --model-filename $repo/exp/jit_trace.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --G $repo/data/lm/G_4_gram.pt \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for m in ctc-decoding 1best; do + ./conformer_ctc3/pretrained.py \ + --checkpoint $repo/exp/pretrained.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --G $repo/data/lm/G_4_gram.pt \ + --method $m \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_lstm_transducer_stateless2_2022_09_03() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + abs_repo=$(realpath $repo) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-iter-468000-avg-16.pt pretrained.pt + ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt + popd + + log "Test exporting with torch.jit.trace()" + + ./lstm_transducer_stateless2/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --use-averaged-model 0 \ + --jit-trace 1 + + log "Decode with models exported by torch.jit.trace()" + + ./lstm_transducer_stateless2/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --encoder-model-filename $repo/exp/encoder_jit_trace.pt \ + --decoder-model-filename $repo/exp/decoder_jit_trace.pt \ + --joiner-model-filename $repo/exp/joiner_jit_trace.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./lstm_transducer_stateless2/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./lstm_transducer_stateless2/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_pruned_transducer_stateless3_2022_05_13() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt + ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt + popd + + + log "Export to torchscript model" + ./pruned_transducer_stateless3/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ./pruned_transducer_stateless3/export.py \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit-trace 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.trace()" + + ./pruned_transducer_stateless3/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --encoder-model-filename $repo/exp/encoder_jit_trace.pt \ + --decoder-model-filename $repo/exp/decoder_jit_trace.pt \ + --joiner-model-filename $repo/exp/joiner_jit_trace.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "Decode with models exported by torch.jit.script()" + + ./pruned_transducer_stateless3/jit_pretrained.py \ + --bpe-model $repo/data/lang_bpe_500/bpe.model \ + --encoder-model-filename $repo/exp/encoder_jit_script.pt \ + --decoder-model-filename $repo/exp/decoder_jit_script.pt \ + --joiner-model-filename $repo/exp/joiner_jit_script.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless3/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless3/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + rm -rf $repo +} + +function test_streaming_pruned_transducer_stateless2_20220625() { + repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + ln -s pretrained-epoch-24-avg-10.pt pretrained.pt + popd + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./pruned_transducer_stateless2/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --simulate-streaming 1 \ + --causal-convolution 1 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./pruned_transducer_stateless2/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --simulate-streaming 1 \ + --causal-convolution 1 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_streaming_zipformer_2023_05_17() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lang_bpe_500/tokens.txt" + git lfs pull --include "exp/jit_script_chunk_16_left_128.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./zipformer/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./zipformer/jit_pretrained_streaming.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \ + $repo/test_wavs/1089-134686-0001.wav + + for method in greedy_search modified_beam_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --causal 1 \ + --chunk-size 16 \ + --left-context-frames 128 \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_zipformer_2023_05_18() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lang_bpe_500/tokens.txt" + git lfs pull --include "exp/jit_script.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./zipformer/export.py \ + --exp-dir $repo/exp \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + ./zipformer/jit_pretrained.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --nn-model-filename $repo/exp/jit_script.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + for method in greedy_search modified_beam_search fast_beam_search; do + log "$method" + + ./zipformer/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_transducer_stateless2_torchaudio_2022_04_19() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless2/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in fast_beam_search modified_beam_search beam_search; do + log "$method" + + ./transducer_stateless2/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_zipformer_transducer_ctc_2023_06_13() { + repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13 + + log "Downloading pre-trained model from $repo_url" + git lfs install + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + pushd $repo/exp + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lang_bpe_500/tokens.txt" + git lfs pull --include "data/lang_bpe_500/HLG.pt" + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/LG.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lm/G_4_gram.pt" + git lfs pull --include "exp/jit_script.pt" + git lfs pull --include "exp/pretrained.pt" + ln -s pretrained.pt epoch-99.pt + ls -lh *.pt + popd + + log "Export to torchscript model" + ./zipformer/export.py \ + --exp-dir $repo/exp \ + --use-transducer 1 \ + --use-ctc 1 \ + --use-averaged-model false \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --epoch 99 \ + --avg 1 \ + --jit 1 + + ls -lh $repo/exp/*.pt + + log "Decode with models exported by torch.jit.script()" + + for method in ctc-decoding 1best; do + ./zipformer/jit_pretrained_ctc.py \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --model-filename $repo/exp/jit_script.pt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --G $repo/data/lm/G_4_gram.pt \ + --method $method \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in ctc-decoding 1best; do + log "$method" + + ./zipformer/pretrained_ctc.py \ + --use-transducer 1 \ + --use-ctc 1 \ + --method $method \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + --G $repo/data/lm/G_4_gram.pt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --sample-rate 16000 \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless_multi_datasets/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./transducer_stateless_multi_datasets/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_transducer_stateless_multi_datasets_bpe_500_2022_03_01() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless_multi_datasets/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in modified_beam_search beam_search fast_beam_search; do + log "$method" + + ./transducer_stateless_multi_datasets/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_transducer_stateless_bpe_500_2022_02_07() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + for sym in 1 2 3; do + log "Greedy search with --max-sym-per-frame $sym" + + ./transducer_stateless/pretrained.py \ + --method greedy_search \ + --max-sym-per-frame $sym \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + + for method in fast_beam_search modified_beam_search beam_search; do + log "$method" + + ./transducer_stateless/pretrained.py \ + --method $method \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + done + rm -rf $repo +} + +function test_zipformer_ctc_en_2023_10_02() { + repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02 + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + log "CTC greedy search" + + ./zipformer/onnx_pretrained_ctc.py \ + --nn-model $repo/model.onnx \ + --tokens $repo/tokens.txt \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + log "CTC H decoding" + + ./zipformer/onnx_pretrained_ctc_H.py \ + --nn-model $repo/model.onnx \ + --tokens $repo/tokens.txt \ + --H $repo/H.fst \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + log "CTC HL decoding" + + ./zipformer/onnx_pretrained_ctc_HL.py \ + --nn-model $repo/model.onnx \ + --words $repo/words.txt \ + --HL $repo/HL.fst \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + log "CTC HLG decoding" + + ./zipformer/onnx_pretrained_ctc_HLG.py \ + --nn-model $repo/model.onnx \ + --words $repo/words.txt \ + --HLG $repo/HLG.fst \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav + + rm -rf $repo +} + +function test_conformer_ctc_jit_bpe_500_2021_11_09() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09 + log "Downloading pre-trained model from $repo_url" + GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url + repo=$(basename $repo_url) + pushd $repo + + git lfs pull --include "exp/pretrained.pt" + git lfs pull --include "data/lang_bpe_500/HLG.pt" + git lfs pull --include "data/lang_bpe_500/L.pt" + git lfs pull --include "data/lang_bpe_500/L_disambig.pt" + git lfs pull --include "data/lang_bpe_500/Linv.pt" + git lfs pull --include "data/lang_bpe_500/bpe.model" + git lfs pull --include "data/lang_bpe_500/lexicon.txt" + git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt" + git lfs pull --include "data/lang_bpe_500/tokens.txt" + git lfs pull --include "data/lang_bpe_500/words.txt" + git lfs pull --include "data/lm/G_3_gram.fst.txt" + + popd + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + log "CTC decoding" + + ./conformer_ctc/pretrained.py \ + --method ctc-decoding \ + --num-classes 500 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "HLG decoding" + + ./conformer_ctc/pretrained.py \ + --method 1best \ + --num-classes 500 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --words-file $repo/data/lang_bpe_500/words.txt \ + --HLG $repo/data/lang_bpe_500/HLG.pt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "CTC decoding on CPU with kaldi decoders using OpenFst" + + log "Exporting model with torchscript" + + pushd $repo/exp + ln -s pretrained.pt epoch-99.pt + popd + + ./conformer_ctc/export.py \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + --jit 1 + + ls -lh $repo/exp + + + log "Generating H.fst, HL.fst" + + ./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt + + ls -lh $repo/data/lang_bpe_500 + + log "Decoding with H on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_H.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --H $repo/data/lang_bpe_500/H.fst \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "Decoding with HL on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_HL.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --HL $repo/data/lang_bpe_500/HL.fst \ + --words $repo/data/lang_bpe_500/words.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + log "Decoding with HLG on CPU with OpenFst" + + ./conformer_ctc/jit_pretrained_decode_with_HLG.py \ + --nn-model $repo/exp/cpu_jit.pt \ + --HLG $repo/data/lang_bpe_500/HLG.fst \ + --words $repo/data/lang_bpe_500/words.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + rm -rf $repo +} + +function test_transducer_bpe_500_2021_12_23() { + repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23 + + log "Downloading pre-trained model from $repo_url" + git lfs install + git clone $repo_url + repo=$(basename $repo_url) + + log "Display test files" + tree $repo/ + ls -lh $repo/test_wavs/*.wav + + log "Beam search decoding" + + ./transducer/pretrained.py \ + --method beam_search \ + --beam-size 4 \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_500/tokens.txt \ + $repo/test_wavs/1089-134686-0001.wav \ + $repo/test_wavs/1221-135766-0001.wav \ + $repo/test_wavs/1221-135766-0002.wav + + rm -rf $repo +} + +prepare_data +run_diagnostics +test_pruned_transducer_stateless_2022_03_12 +test_pruned_transducer_stateless2_2022_04_29 +test_pruned_transducer_stateless3_2022_04_29 +test_pruned_transducer_stateless5_2022_05_13 +test_pruned_transducer_stateless7_2022_11_11 +test_pruned_transducer_stateless8_2022_11_14 +test_pruned_transducer_stateless7_ctc_2022_12_01 +test_zipformer_mmi_2022_12_08 +test_pruned_transducer_stateless7_streaming_2022_12_29 +test_pruned_transducer_stateless7_ctc_bs_2023_01_29 +test_conformer_ctc3_2022_11_27 +test_lstm_transducer_stateless2_2022_09_03 +test_pruned_transducer_stateless3_2022_05_13 +test_streaming_pruned_transducer_stateless2_20220625 +test_streaming_zipformer_2023_05_17 +test_zipformer_2023_05_18 +test_transducer_stateless2_torchaudio_2022_04_19 +test_zipformer_transducer_ctc_2023_06_13 +test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21 +test_transducer_stateless_multi_datasets_bpe_500_2022_03_01 +test_transducer_stateless_bpe_500_2022_02_07 +test_zipformer_ctc_en_2023_10_02 +# test_conformer_ctc_jit_bpe_500_2021_11_09 # failes for torch != 1.13.x and torch != 2.0.x +test_transducer_bpe_500_2021_12_23 diff --git a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh b/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh deleted file mode 100755 index f6fe8c9b2..000000000 --- a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/HLG.pt" -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/LG.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lm/G_4_gram.pt" -git lfs pull --include "exp/jit_trace.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Decode with models exported by torch.jit.trace()" - -for m in ctc-decoding 1best; do - ./conformer_ctc3/jit_pretrained.py \ - --model-filename $repo/exp/jit_trace.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --G $repo/data/lm/G_4_gram.pt \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -log "Export to torchscript model" - -./conformer_ctc3/export.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --jit-trace 1 \ - --epoch 99 \ - --avg 1 \ - --use-averaged-model 0 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.trace()" - -for m in ctc-decoding 1best; do - ./conformer_ctc3/jit_pretrained.py \ - --model-filename $repo/exp/jit_trace.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --G $repo/data/lm/G_4_gram.pt \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for m in ctc-decoding 1best; do - ./conformer_ctc3/pretrained.py \ - --checkpoint $repo/exp/pretrained.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --G $repo/data/lm/G_4_gram.pt \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p conformer_ctc3/exp - ln -s $PWD/$repo/exp/pretrained.pt conformer_ctc3/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh conformer_ctc3/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in ctc-decoding 1best; do - log "Decoding with $method" - ./conformer_ctc3/decode.py \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --exp-dir conformer_ctc3/exp/ \ - --max-duration $max_duration \ - --decoding-method $method \ - --lm-dir data/lm - done - - rm conformer_ctc3/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh deleted file mode 100755 index 412e3ad56..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in fast_beam_search modified_beam_search beam_search; do - log "$method" - - ./pruned_transducer_stateless/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless/exp - done - - rm pruned_transducer_stateless/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh deleted file mode 100755 index 243b669ed..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt" -popd - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-epoch-38-avg-10.pt pretrained.pt -popd - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless2/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless2/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless2/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless2/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless2/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless2/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless2/exp - done - - rm pruned_transducer_stateless2/exp/*.pt - rm -r data/lang_bpe_500 -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh deleted file mode 100755 index 2d0f80304..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) -pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt" -popd - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-epoch-25-avg-6.pt pretrained.pt -popd - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless3/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless3/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless3/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless3/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless3/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless3/exp - done - - rm pruned_transducer_stateless3/exp/*.pt - rm -r data/lang_bpe_500 -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh deleted file mode 100755 index 3d5814c48..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt -ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt -popd - - -log "Export to torchscript model" -./pruned_transducer_stateless3/export.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -./pruned_transducer_stateless3/export.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit-trace 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.trace()" - -./pruned_transducer_stateless3/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --encoder-model-filename $repo/exp/encoder_jit_trace.pt \ - --decoder-model-filename $repo/exp/decoder_jit_trace.pt \ - --joiner-model-filename $repo/exp/joiner_jit_trace.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless3/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --encoder-model-filename $repo/exp/encoder_jit_script.pt \ - --decoder-model-filename $repo/exp/decoder_jit_script.pt \ - --joiner-model-filename $repo/exp/joiner_jit_script.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless3/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless3/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless3/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless3/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless3/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless3/exp - done - - rm pruned_transducer_stateless3/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh deleted file mode 100755 index 3d2442d54..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-epoch-39-avg-7.pt pretrained.pt -popd - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless5/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --num-encoder-layers 18 \ - --dim-feedforward 2048 \ - --nhead 8 \ - --encoder-dim 512 \ - --decoder-dim 512 \ - --joiner-dim 512 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless5/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav \ - --num-encoder-layers 18 \ - --dim-feedforward 2048 \ - --nhead 8 \ - --encoder-dim 512 \ - --decoder-dim 512 \ - --joiner-dim 512 -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless5/exp - ln -s $PWD/$repo/exp/pretrained-epoch-39-avg-7.pt pruned_transducer_stateless5/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless5/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless5/decode.py \ - --decoding-method $method \ - --use-averaged-model 0 \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless5/exp \ - --num-encoder-layers 18 \ - --dim-feedforward 2048 \ - --nhead 8 \ - --encoder-dim 512 \ - --decoder-dim 512 \ - --joiner-dim 512 - done - - rm pruned_transducer_stateless5/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh deleted file mode 100755 index 961dde4f4..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./pruned_transducer_stateless7/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless7/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless7/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless7/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless7/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless7/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless7/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless7/exp - done - - rm pruned_transducer_stateless7/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh deleted file mode 100755 index ba7139efb..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/HLG.pt" -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/LG.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lm/G_4_gram.pt" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./pruned_transducer_stateless7_ctc/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless7_ctc/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ - --model-filename $repo/exp/cpu_jit.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --G $repo/data/lm/G_4_gram.pt \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless7_ctc/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless7_ctc/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ - --checkpoint $repo/exp/pretrained.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --G $repo/data/lm/G_4_gram.pt \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless7_ctc/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless7_ctc/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless7_ctc/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless7_ctc/exp - done - - for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc/ctc_decode.py \ - --epoch 999 \ - --avg 1 \ - --exp-dir ./pruned_transducer_stateless7_ctc/exp \ - --max-duration $max_duration \ - --use-averaged-model 0 \ - --decoding-method $m \ - --hlg-scale 0.6 \ - --lm-dir data/lm - done - - rm pruned_transducer_stateless7_ctc/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh deleted file mode 100755 index 1ecbc4798..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/LG.pt" -git lfs pull --include "data/lang_bpe_500/HLG.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./pruned_transducer_stateless7_ctc_bs/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ - --model-filename $repo/exp/cpu_jit.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless7_ctc_bs/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ - --checkpoint $repo/exp/pretrained.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --method $m \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" - -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless7_ctc_bs/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc_bs/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless7_ctc_bs/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless7_ctc_bs/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless7_ctc_bs/exp - done - - for m in ctc-decoding 1best; do - ./pruned_transducer_stateless7_ctc_bs/ctc_decode.py \ - --epoch 999 \ - --avg 1 \ - --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \ - --max-duration $max_duration \ - --use-averaged-model 0 \ - --decoding-method $m \ - --hlg-scale 0.6 - done - - rm pruned_transducer_stateless7_ctc_bs/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh deleted file mode 100755 index 37b192a57..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh +++ /dev/null @@ -1,148 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -git lfs pull --include "exp/encoder_jit_trace.pt" -git lfs pull --include "exp/decoder_jit_trace.pt" -git lfs pull --include "exp/joiner_jit_trace.pt" -cd exp -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./pruned_transducer_stateless7_streaming/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --decode-chunk-len 32 \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless7_streaming/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - --decode-chunk-len 32 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "Export to torchscript model by torch.jit.trace()" -./pruned_transducer_stateless7_streaming/jit_trace_export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --decode-chunk-len 32 \ - --epoch 99 \ - --avg 1 - -log "Decode with models exported by torch.jit.trace()" - -./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --encoder-model-filename $repo/exp/encoder_jit_trace.pt \ - --decoder-model-filename $repo/exp/decoder_jit_trace.pt \ - --joiner-model-filename $repo/exp/joiner_jit_trace.pt \ - --decode-chunk-len 32 \ - $repo/test_wavs/1089-134686-0001.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless7_streaming/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --decode-chunk-len 32 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless7_streaming/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --decode-chunk-len 32 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless7_streaming/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_streaming/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless7_streaming/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - num_decode_stream=200 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "decoding with $method" - - ./pruned_transducer_stateless7_streaming/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --decode-chunk-len 32 \ - --exp-dir pruned_transducer_stateless7_streaming/exp - done - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless7_streaming/streaming_decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --decode-chunk-len 32 \ - --num-decode-streams $num_decode_stream - --exp-dir pruned_transducer_stateless7_streaming/exp - done - - rm pruned_transducer_stateless7_streaming/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh deleted file mode 100755 index 4f2bfac24..000000000 --- a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless8/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "Export to torchscript model" -./pruned_transducer_stateless8/export.py \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --use-averaged-model false \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./pruned_transducer_stateless8/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless8/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless8/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless8/exp - ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless8/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless8/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./pruned_transducer_stateless8/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless8/exp - done - - rm pruned_transducer_stateless8/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh b/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh deleted file mode 100755 index 5cbdad16d..000000000 --- a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -ln -s pretrained-epoch-24-avg-10.pt pretrained.pt -popd - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./pruned_transducer_stateless2/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --simulate-streaming 1 \ - --causal-convolution 1 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./pruned_transducer_stateless2/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --simulate-streaming 1 \ - --causal-convolution 1 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p pruned_transducer_stateless2/exp - ln -s $PWD/$repo/exp/pretrained-epoch-24-avg-10.pt pruned_transducer_stateless2/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh pruned_transducer_stateless2/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Simulate streaming decoding with $method" - - ./pruned_transducer_stateless2/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir pruned_transducer_stateless2/exp \ - --simulate-streaming 1 \ - --causal-convolution 1 - done - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Real streaming decoding with $method" - - ./pruned_transducer_stateless2/streaming_decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --num-decode-streams 100 \ - --exp-dir pruned_transducer_stateless2/exp \ - --left-context 32 \ - --decode-chunk-size 8 \ - --right-context 0 - done - - rm pruned_transducer_stateless2/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh b/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh deleted file mode 100755 index f4e2124b1..000000000 --- a/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lang_bpe_500/tokens.txt" -git lfs pull --include "exp/jit_script_chunk_16_left_128.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./zipformer/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --causal 1 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./zipformer/jit_pretrained_streaming.py \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \ - $repo/test_wavs/1089-134686-0001.wav - -for method in greedy_search modified_beam_search fast_beam_search; do - log "$method" - - ./zipformer/pretrained.py \ - --causal 1 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p zipformer/exp - ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh zipformer/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Simulated streaming decoding with $method" - - ./zipformer/decode.py \ - --causal 1 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir zipformer/exp - done - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Chunk-wise streaming decoding with $method" - - ./zipformer/streaming_decode.py \ - --causal 1 \ - --chunk-size 16 \ - --left-context-frames 128 \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir zipformer/exp - done - - rm zipformer/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh deleted file mode 100755 index ff77855a2..000000000 --- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless2/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in fast_beam_search modified_beam_search beam_search; do - log "$method" - - ./transducer_stateless2/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p transducer_stateless2/exp - ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless2/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh transducer_stateless2/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./transducer_stateless2/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir transducer_stateless2/exp - done - - rm transducer_stateless2/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-zipformer-2023-05-18.sh b/.github/scripts/run-librispeech-zipformer-2023-05-18.sh deleted file mode 100755 index fb1a0149d..000000000 --- a/.github/scripts/run-librispeech-zipformer-2023-05-18.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lang_bpe_500/tokens.txt" -git lfs pull --include "exp/jit_script.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./zipformer/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./zipformer/jit_pretrained.py \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --nn-model-filename $repo/exp/jit_script.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for method in greedy_search modified_beam_search fast_beam_search; do - log "$method" - - ./zipformer/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p zipformer/exp - ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh zipformer/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./zipformer/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir zipformer/exp - done - - rm zipformer/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh b/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh deleted file mode 100755 index 0026d2109..000000000 --- a/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13 - -log "Downloading pre-trained model from $repo_url" -git lfs install -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lang_bpe_500/tokens.txt" -git lfs pull --include "data/lang_bpe_500/HLG.pt" -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/LG.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lm/G_4_gram.pt" -git lfs pull --include "exp/jit_script.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./zipformer/export.py \ - --exp-dir $repo/exp \ - --use-transducer 1 \ - --use-ctc 1 \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -for method in ctc-decoding 1best; do - ./zipformer/jit_pretrained_ctc.py \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --model-filename $repo/exp/jit_script.pt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --G $repo/data/lm/G_4_gram.pt \ - --method $method \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in ctc-decoding 1best; do - log "$method" - - ./zipformer/pretrained_ctc.py \ - --use-transducer 1 \ - --use-ctc 1 \ - --method $method \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - --G $repo/data/lm/G_4_gram.pt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --sample-rate 16000 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p zipformer/exp - ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh zipformer/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in ctc-decoding 1best; do - log "Decoding with $method" - - ./zipformer/ctc_decode.py \ - --use-transducer 1 \ - --use-ctc 1 \ - --decoding-method $method \ - --nbest-scale 1.0 \ - --hlg-scale 0.6 \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --max-duration $max_duration \ - --exp-dir zipformer/exp - done - - rm zipformer/exp/*.pt -fi diff --git a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh b/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh deleted file mode 100755 index c59921055..000000000 --- a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08 - -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -pushd $repo/exp -git lfs pull --include "data/lang_bpe_500/3gram.pt" -git lfs pull --include "data/lang_bpe_500/4gram.pt" -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/LG.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "exp/cpu_jit.pt" -git lfs pull --include "exp/pretrained.pt" -ln -s pretrained.pt epoch-99.pt -ls -lh *.pt -popd - -log "Export to torchscript model" -./zipformer_mmi/export.py \ - --exp-dir $repo/exp \ - --use-averaged-model false \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --epoch 99 \ - --avg 1 \ - --jit 1 - -ls -lh $repo/exp/*.pt - -log "Decode with models exported by torch.jit.script()" - -./zipformer_mmi/jit_pretrained.py \ - --bpe-model $repo/data/lang_bpe_500/bpe.model \ - --nn-model-filename $repo/exp/cpu_jit.pt \ - --lang-dir $repo/data/lang_bpe_500 \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do - log "$method" - - ./zipformer_mmi/pretrained.py \ - --method $method \ - --checkpoint $repo/exp/pretrained.pt \ - --lang-dir $repo/data/lang_bpe_500 \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p zipformer_mmi/exp - ln -s $PWD/$repo/exp/pretrained.pt zipformer_mmi/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh zipformer_mmi/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do - log "Decoding with $method" - - ./zipformer_mmi/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --use-averaged-model 0 \ - --nbest-scale 1.2 \ - --hp-scale 1.0 \ - --max-duration $max_duration \ - --lang-dir $repo/data/lang_bpe_500 \ - --exp-dir zipformer_mmi/exp - done - - rm zipformer_mmi/exp/*.pt -fi diff --git a/.github/scripts/run-pre-trained-ctc.sh b/.github/scripts/run-pre-trained-ctc.sh deleted file mode 100755 index 7d6449c9a..000000000 --- a/.github/scripts/run-pre-trained-ctc.sh +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -pushd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02 -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -log "CTC greedy search" - -./zipformer/onnx_pretrained_ctc.py \ - --nn-model $repo/model.onnx \ - --tokens $repo/tokens.txt \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -log "CTC H decoding" - -./zipformer/onnx_pretrained_ctc_H.py \ - --nn-model $repo/model.onnx \ - --tokens $repo/tokens.txt \ - --H $repo/H.fst \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -log "CTC HL decoding" - -./zipformer/onnx_pretrained_ctc_HL.py \ - --nn-model $repo/model.onnx \ - --words $repo/words.txt \ - --HL $repo/HL.fst \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -log "CTC HLG decoding" - -./zipformer/onnx_pretrained_ctc_HLG.py \ - --nn-model $repo/model.onnx \ - --words $repo/words.txt \ - --HLG $repo/HLG.fst \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -rm -rf $repo - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09 -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) -pushd $repo - -git lfs pull --include "exp/pretrained.pt" -git lfs pull --include "data/lang_bpe_500/HLG.pt" -git lfs pull --include "data/lang_bpe_500/L.pt" -git lfs pull --include "data/lang_bpe_500/L_disambig.pt" -git lfs pull --include "data/lang_bpe_500/Linv.pt" -git lfs pull --include "data/lang_bpe_500/bpe.model" -git lfs pull --include "data/lang_bpe_500/lexicon.txt" -git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt" -git lfs pull --include "data/lang_bpe_500/tokens.txt" -git lfs pull --include "data/lang_bpe_500/words.txt" -git lfs pull --include "data/lm/G_3_gram.fst.txt" - -popd - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -log "CTC decoding" - -./conformer_ctc/pretrained.py \ - --method ctc-decoding \ - --num-classes 500 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "HLG decoding" - -./conformer_ctc/pretrained.py \ - --method 1best \ - --num-classes 500 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --words-file $repo/data/lang_bpe_500/words.txt \ - --HLG $repo/data/lang_bpe_500/HLG.pt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "CTC decoding on CPU with kaldi decoders using OpenFst" - -log "Exporting model with torchscript" - -pushd $repo/exp -ln -s pretrained.pt epoch-99.pt -popd - -./conformer_ctc/export.py \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - --jit 1 - -ls -lh $repo/exp - - -log "Generating H.fst, HL.fst" - -./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt - -ls -lh $repo/data/lang_bpe_500 - -log "Decoding with H on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_H.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --H $repo/data/lang_bpe_500/H.fst \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "Decoding with HL on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_HL.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --HL $repo/data/lang_bpe_500/HL.fst \ - --words $repo/data/lang_bpe_500/words.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -log "Decoding with HLG on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_HLG.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --HLG $repo/data/lang_bpe_500/HLG.fst \ - --words $repo/data/lang_bpe_500/words.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav - -rm -rf $repo - -popd - -log "Test aishell" - -pushd egs/aishell/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc -log "Downloading pre-trained model from $repo_url" -GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url -repo=$(basename $repo_url) -pushd $repo - -git lfs pull --include "exp/pretrained.pt" -git lfs pull --include "data/lang_char/H.fst" -git lfs pull --include "data/lang_char/HL.fst" -git lfs pull --include "data/lang_char/HLG.fst" - -popd - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -log "CTC decoding" - -log "Exporting model with torchscript" - -pushd $repo/exp -ln -s pretrained.pt epoch-99.pt -popd - -./conformer_ctc/export.py \ - --epoch 99 \ - --avg 1 \ - --exp-dir $repo/exp \ - --tokens $repo/data/lang_char/tokens.txt \ - --jit 1 - -ls -lh $repo/exp - -ls -lh $repo/data/lang_char - -log "Decoding with H on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_H.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --H $repo/data/lang_char/H.fst \ - --tokens $repo/data/lang_char/tokens.txt \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -log "Decoding with HL on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_HL.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --HL $repo/data/lang_char/HL.fst \ - --words $repo/data/lang_char/words.txt \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -log "Decoding with HLG on CPU with OpenFst" - -./conformer_ctc/jit_pretrained_decode_with_HLG.py \ - --nn-model $repo/exp/cpu_jit.pt \ - --HLG $repo/data/lang_char/HLG.fst \ - --words $repo/data/lang_char/words.txt \ - $repo/test_wavs/0.wav \ - $repo/test_wavs/1.wav \ - $repo/test_wavs/2.wav - -rm -rf $repo diff --git a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh deleted file mode 100755 index 7b686328d..000000000 --- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless_multi_datasets/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./transducer_stateless_multi_datasets/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p transducer_stateless_multi_datasets/exp - ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless_multi_datasets/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh transducer_stateless_multi_datasets/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./transducer_stateless_multi_datasets/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir transducer_stateless_multi_datasets/exp - done - - rm transducer_stateless_multi_datasets/exp/*.pt -fi diff --git a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh deleted file mode 100755 index a8eeeb514..000000000 --- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless_multi_datasets/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in modified_beam_search beam_search fast_beam_search; do - log "$method" - - ./transducer_stateless_multi_datasets/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p transducer_stateless_multi_datasets/exp - ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless_multi_datasets/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh transducer_stateless_multi_datasets/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./transducer_stateless_multi_datasets/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir transducer_stateless_multi_datasets/exp - done - - rm transducer_stateless_multi_datasets/exp/*.pt -fi diff --git a/.github/scripts/run-pre-trained-transducer-stateless.sh b/.github/scripts/run-pre-trained-transducer-stateless.sh deleted file mode 100755 index 2e2360435..000000000 --- a/.github/scripts/run-pre-trained-transducer-stateless.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -for sym in 1 2 3; do - log "Greedy search with --max-sym-per-frame $sym" - - ./transducer_stateless/pretrained.py \ - --method greedy_search \ - --max-sym-per-frame $sym \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -for method in fast_beam_search modified_beam_search beam_search; do - log "$method" - - ./transducer_stateless/pretrained.py \ - --method $method \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav -done - -echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}" -echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}" -if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then - mkdir -p transducer_stateless/exp - ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless/exp/epoch-999.pt - ln -s $PWD/$repo/data/lang_bpe_500 data/ - - ls -lh data - ls -lh transducer_stateless/exp - - log "Decoding test-clean and test-other" - - # use a small value for decoding with CPU - max_duration=100 - - for method in greedy_search fast_beam_search modified_beam_search; do - log "Decoding with $method" - - ./transducer_stateless/decode.py \ - --decoding-method $method \ - --epoch 999 \ - --avg 1 \ - --max-duration $max_duration \ - --exp-dir transducer_stateless/exp - done - - rm transducer_stateless/exp/*.pt -fi diff --git a/.github/scripts/run-pre-trained-transducer.sh b/.github/scripts/run-pre-trained-transducer.sh deleted file mode 100755 index b865f8d13..000000000 --- a/.github/scripts/run-pre-trained-transducer.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -set -e - -log() { - # This function is from espnet - local fname=${BASH_SOURCE[1]##*/} - echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" -} - -cd egs/librispeech/ASR - -repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23 - -log "Downloading pre-trained model from $repo_url" -git lfs install -git clone $repo_url -repo=$(basename $repo_url) - -log "Display test files" -tree $repo/ -ls -lh $repo/test_wavs/*.wav - -log "Beam search decoding" - -./transducer/pretrained.py \ - --method beam_search \ - --beam-size 4 \ - --checkpoint $repo/exp/pretrained.pt \ - --tokens $repo/data/lang_bpe_500/tokens.txt \ - $repo/test_wavs/1089-134686-0001.wav \ - $repo/test_wavs/1221-135766-0001.wav \ - $repo/test_wavs/1221-135766-0002.wav diff --git a/.github/workflows/aishell.yml b/.github/workflows/aishell.yml index 136e117bd..8b0599fca 100644 --- a/.github/workflows/aishell.yml +++ b/.github/workflows/aishell.yml @@ -11,22 +11,13 @@ on: workflow_dispatch: - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - concurrency: group: aishell-${{ github.ref }} cancel-in-progress: true jobs: generate_build_matrix: - if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'aishell' || github.event_name == 'schedule') + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'aishell') # see https://github.com/pytorch/pytorch/pull/50633 runs-on: ubuntu-latest diff --git a/.github/workflows/train-librispeech.yml b/.github/workflows/librispeech.yml similarity index 95% rename from .github/workflows/train-librispeech.yml rename to .github/workflows/librispeech.yml index 79002a881..6e087b10a 100644 --- a/.github/workflows/train-librispeech.yml +++ b/.github/workflows/librispeech.yml @@ -1,4 +1,4 @@ -name: train librispeech +name: librispeech on: push: branches: @@ -11,7 +11,7 @@ on: workflow_dispatch: concurrency: - group: train-librispeech-${{ github.ref }} + group: librispeech-${{ github.ref }} cancel-in-progress: true jobs: @@ -32,7 +32,7 @@ jobs: python ./.github/scripts/docker/generate_build_matrix.py MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py) echo "::set-output name=matrix::${MATRIX}" - train-librispeech: + librispeech: needs: generate_build_matrix name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }} runs-on: ubuntu-latest diff --git a/.github/workflows/run-librispeech-2022-03-12.yml b/.github/workflows/run-librispeech-2022-03-12.yml deleted file mode 100644 index f092e3c80..000000000 --- a/.github/workflows/run-librispeech-2022-03-12.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-03-12 -# stateless transducer + k2 pruned rnnt-loss - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_03_12-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_03_12: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh - - - name: Display decoding results for pruned_transducer_stateless - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless/exp - - cd pruned_transducer_stateless - echo "results for pruned_transducer_stateless" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for pruned_transducer_stateless - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless-2022-03-12 - path: egs/librispeech/ASR/pruned_transducer_stateless/exp/ diff --git a/.github/workflows/run-librispeech-2022-04-29.yml b/.github/workflows/run-librispeech-2022-04-29.yml deleted file mode 100644 index f8f4d9977..000000000 --- a/.github/workflows/run-librispeech-2022-04-29.yml +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-04-29 -# stateless pruned transducer (reworked model) + giga speech - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_04_29-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_04_29: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh - - .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh - - - name: Display decoding results for pruned_transducer_stateless2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR - tree pruned_transducer_stateless2/exp - cd pruned_transducer_stateless2/exp - echo "===greedy search===" - find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Display decoding results for pruned_transducer_stateless3 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR - tree pruned_transducer_stateless3/exp - cd pruned_transducer_stateless3/exp - echo "===greedy search===" - find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for pruned_transducer_stateless2 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless2-2022-04-29 - path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/ - - - name: Upload decoding results for pruned_transducer_stateless3 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-04-29 - path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-librispeech-2022-05-13.yml b/.github/workflows/run-librispeech-2022-05-13.yml deleted file mode 100644 index dc20185da..000000000 --- a/.github/workflows/run-librispeech-2022-05-13.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-05-13 -# stateless transducer + k2 pruned rnnt-loss + deeper model - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_05_13-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_05_13: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless5 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless5/exp - - cd pruned_transducer_stateless5 - echo "results for pruned_transducer_stateless5" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech pruned_transducer_stateless5 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless5-2022-05-13 - path: egs/librispeech/ASR/pruned_transducer_stateless5/exp/ diff --git a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml deleted file mode 100644 index 7e378c9a1..000000000 --- a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-11-11-stateless7 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_11_11_zipformer-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_11_11_zipformer: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless7 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless7/exp - - cd pruned_transducer_stateless7 - echo "results for pruned_transducer_stateless7" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech pruned_transducer_stateless7 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-2022-11-11 - path: egs/librispeech/ASR/pruned_transducer_stateless7/exp/ diff --git a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml deleted file mode 100644 index a2c1a0ad6..000000000 --- a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-11-14-stateless8 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_11_14_zipformer_stateless8-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_11_14_zipformer_stateless8: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless8 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless8/exp - - cd pruned_transducer_stateless8 - echo "results for pruned_transducer_stateless8" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech pruned_transducer_stateless8 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless8-2022-11-14 - path: egs/librispeech/ASR/pruned_transducer_stateless8/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml b/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml deleted file mode 100644 index 500ab1736..000000000 --- a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-12-01-stateless7-ctc -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -jobs: - run_librispeech_2022_11_11_zipformer: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless7_ctc - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless7_ctc/exp - - cd pruned_transducer_stateless7_ctc - echo "results for pruned_transducer_stateless7_ctc" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===ctc decoding===" - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===1best===" - find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-2022-12-01 - path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml b/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml deleted file mode 100644 index 1a7f9f594..000000000 --- a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright 2022 Zengwei Yao - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-12-08-zipformer-mmi -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_12_08_zipformer-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_12_08_zipformer: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh - - - name: Display decoding results for librispeech zipformer-mmi - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./zipformer-mmi/exp - - cd zipformer-mmi - echo "results for zipformer-mmi" - echo "===1best===" - find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===nbest===" - find exp/nbest -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/nbest -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===nbest-rescoring-LG===" - find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===nbest-rescoring-3-gram===" - find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===nbest-rescoring-4-gram===" - find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech zipformer-mmi - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer_mmi-2022-12-08 - path: egs/librispeech/ASR/zipformer_mmi/exp/ diff --git a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml b/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml deleted file mode 100644 index 68014e20c..000000000 --- a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-12-29-stateless7-streaming -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_12_29_zipformer_streaming-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_12_29_zipformer_streaming: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'streaming-zipformer' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless7_streaming - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless7_streaming/exp - - cd pruned_transducer_stateless7_streaming - echo "results for pruned_transducer_stateless7_streaming" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===streaming greedy search===" - find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===streaming fast_beam_search===" - find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===streaming modified beam search===" - find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - - name: Upload decoding results for librispeech pruned_transducer_stateless7_streaming - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-streaming-2022-12-29 - path: egs/librispeech/ASR/pruned_transducer_stateless7_streaming/exp/ diff --git a/.github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml b/.github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml deleted file mode 100644 index 821abc25d..000000000 --- a/.github/workflows/run-librispeech-2023-01-29-stateless7-ctc-bs.yml +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2023-01-29-stateless7-ctc-bs -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -jobs: - run_librispeech_2023_01_29_zipformer_ctc_bs: - if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2023-01-29.sh - - - name: Display decoding results for librispeech pruned_transducer_stateless7_ctc_bs - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless7_ctc_bs/exp - - cd pruned_transducer_stateless7_ctc_bs - echo "results for pruned_transducer_stateless7_ctc_bs" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===ctc decoding===" - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===1best===" - find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc_bs - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless7-ctc-bs-2023-01-29 - path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/ diff --git a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml deleted file mode 100644 index 905515dc4..000000000 --- a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-conformer-ctc3-2022-11-28 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_11_28_conformer_ctc3-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_11_28_conformer_ctc3: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh - - - name: Display decoding results for librispeech conformer_ctc3 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./conformer_ctc3/exp - - cd conformer_ctc3 - echo "results for conformer_ctc3" - echo "===ctc-decoding===" - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===1best===" - find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech conformer_ctc3 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-conformer_ctc3-2022-11-28 - path: egs/librispeech/ASR/conformer_ctc3/exp/ diff --git a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml deleted file mode 100644 index 3fb0920bc..000000000 --- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-pruned-transducer-stateless3-2022-05-13 -# stateless pruned transducer (reworked model) + giga speech - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_pruned_transducer_stateless3_2022_05_13-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_pruned_transducer_stateless3_2022_05_13: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh - - - name: Display decoding results for pruned_transducer_stateless3 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR - tree pruned_transducer_stateless3/exp - cd pruned_transducer_stateless3/exp - echo "===greedy search===" - find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for pruned_transducer_stateless3 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless3-2022-04-29 - path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/ diff --git a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml deleted file mode 100644 index 67a6f6fc4..000000000 --- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-streaming-2022-06-26 -# streaming conformer stateless transducer2 - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_streaming_2022_06_26-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_streaming_2022_06_26: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh - - - name: Display decoding results - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./pruned_transducer_stateless2/exp - - cd pruned_transducer_stateless2 - echo "results for pruned_transducer_stateless2" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified_beam_search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for pruned_transducer_stateless2 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-pruned_transducer_stateless2-2022-06-26 - path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml b/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml deleted file mode 100644 index 5145fb43c..000000000 --- a/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml +++ /dev/null @@ -1,174 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-streaming-zipformer-2023-05-18 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2023_05_18_streaming_zipformer-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2023_05_18_streaming_zipformer: - if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh - - - name: Display decoding results for librispeech zipformer - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./zipformer/exp - - cd zipformer - - echo "results for zipformer, simulated streaming decoding" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "results for zipformer, chunk-wise streaming decoding" - echo "===greedy search===" - find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - - name: Upload decoding results for librispeech zipformer - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 - path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml deleted file mode 100644 index 35ca08a31..000000000 --- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-2022-04-19 -# stateless transducer + torchaudio rnn-t loss - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2022_04_19-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2022_04_19: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh - - - name: Display decoding results - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./transducer_stateless2/exp - - cd transducer_stateless2 - echo "results for transducer_stateless2" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified_beam_search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for transducer_stateless2 - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless2-2022-04-19 - path: egs/librispeech/ASR/transducer_stateless2/exp/ diff --git a/.github/workflows/run-librispeech-zipformer-2023-05-18.yml b/.github/workflows/run-librispeech-zipformer-2023-05-18.yml deleted file mode 100644 index e9d235ad1..000000000 --- a/.github/workflows/run-librispeech-zipformer-2023-05-18.yml +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-zipformer-2023-05-18 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2023_05_18_zipformer-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2023_05_18_zipformer: - if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-zipformer-2023-05-18.sh - - - name: Display decoding results for librispeech zipformer - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./zipformer/exp - - cd zipformer - echo "results for zipformer" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech zipformer - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 - path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml b/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml deleted file mode 100644 index 48f0b1532..000000000 --- a/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml +++ /dev/null @@ -1,155 +0,0 @@ -# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-librispeech-zipformer-ctc-2023-06-14 -# zipformer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_librispeech_2023_06_14_zipformer-ctc-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_librispeech_2023_06_14_zipformer_ctc: - if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh - - - name: Display decoding results for librispeech zipformer - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./zipformer/exp - - cd zipformer - echo "results for zipformer" - echo "===ctc-decoding===" - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===1best===" - find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for librispeech zipformer - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 - path: egs/librispeech/ASR/zipformer/exp/ diff --git a/.github/workflows/run-pretrained-ctc.yml b/.github/workflows/run-pretrained-ctc.yml deleted file mode 100644 index 074a63dfc..000000000 --- a/.github/workflows/run-pretrained-ctc.yml +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-ctc - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - workflow_dispatch: - inputs: - test-run: - description: 'Test (y/n)?' - required: true - default: 'y' - -concurrency: - group: run_pre_trained_ctc-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_ctc: - if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event.inputs.test-run == 'y' || github.event.label.name == 'ctc' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Inference with pre-trained model - shell: bash - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - .github/scripts/run-pre-trained-ctc.sh diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml deleted file mode 100644 index f8caee8e5..000000000 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-trandsucer-stateless-multi-datasets-librispeech-100h - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh - - - name: Display decoding results for transducer_stateless_multi_datasets - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./transducer_stateless_multi_datasets/exp - - cd transducer_stateless_multi_datasets - echo "results for transducer_stateless_multi_datasets" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for transducer_stateless_multi_datasets - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless_multi_datasets-100h-2022-02-21 - path: egs/librispeech/ASR/transducer_stateless_multi_datasets/exp/ diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml deleted file mode 100644 index 7c3910eb8..000000000 --- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-trandsucer-stateless-multi-datasets-librispeech-960h - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh - - - name: Display decoding results for transducer_stateless_multi_datasets - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./transducer_stateless_multi_datasets/exp - - cd transducer_stateless_multi_datasets - echo "results for transducer_stateless_multi_datasets" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for transducer_stateless_multi_datasets - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless_multi_datasets-100h-2022-03-01 - path: egs/librispeech/ASR/transducer_stateless_multi_datasets/exp/ diff --git a/.github/workflows/run-pretrained-transducer-stateless.yml b/.github/workflows/run-pretrained-transducer-stateless.yml deleted file mode 100644 index 1b69b97bf..000000000 --- a/.github/workflows/run-pretrained-transducer-stateless.yml +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-transducer-stateless - -on: - push: - branches: - - master - pull_request: - types: [labeled] - - schedule: - # minute (0-59) - # hour (0-23) - # day of the month (1-31) - # month (1-12) - # day of the week (0-6) - # nightly build at 15:50 UTC time every day - - cron: "50 15 * * *" - -concurrency: - group: run_pre_trained_transducer_stateless-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer_stateless: - if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/install-kaldifeat.sh - - - name: Cache LibriSpeech test-clean and test-other datasets - id: libri-test-clean-and-test-other-data - uses: actions/cache@v2 - with: - path: | - ~/tmp/download - key: cache-libri-test-clean-and-test-other - - - name: Download LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh - - - name: Prepare manifests for LibriSpeech test-clean and test-other - shell: bash - run: | - .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh - - - name: Cache LibriSpeech test-clean and test-other fbank features - id: libri-test-clean-and-test-other-fbank - uses: actions/cache@v2 - with: - path: | - ~/tmp/fbank-libri - key: cache-libri-fbank-test-clean-and-test-other-v2 - - - name: Compute fbank for LibriSpeech test-clean and test-other - if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true' - shell: bash - run: | - .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh - - - name: Inference with pre-trained model - shell: bash - env: - GITHUB_EVENT_NAME: ${{ github.event_name }} - GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }} - run: | - mkdir -p egs/librispeech/ASR/data - ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank - ls -lh egs/librispeech/ASR/data/* - - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - - .github/scripts/run-pre-trained-transducer-stateless.sh - - - name: Display decoding results for transducer_stateless - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - shell: bash - run: | - cd egs/librispeech/ASR/ - tree ./transducer_stateless/exp - - cd transducer_stateless - echo "results for transducer_stateless" - echo "===greedy search===" - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===fast_beam_search===" - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - echo "===modified beam search===" - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2 - find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - - - name: Upload decoding results for transducer_stateless - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' - with: - name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-transducer_stateless-2022-02-07 - path: egs/librispeech/ASR/transducer_stateless/exp/ diff --git a/.github/workflows/run-pretrained-transducer.yml b/.github/workflows/run-pretrained-transducer.yml deleted file mode 100644 index 91d87f1c9..000000000 --- a/.github/workflows/run-pretrained-transducer.yml +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com) - -# See ../../LICENSE for clarification regarding multiple authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: run-pre-trained-transducer - -on: - push: - branches: - - master - pull_request: - types: [labeled] - -concurrency: - group: run_pre_trained_transducer-${{ github.ref }} - cancel-in-progress: true - -jobs: - run_pre_trained_transducer: - if: github.event.label.name == 'ready' || github.event_name == 'push' - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python-version: [3.8] - - fail-fast: false - - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: '**/requirements-ci.txt' - - - name: Install Python dependencies - run: | - grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install - pip uninstall -y protobuf - pip install --no-binary protobuf protobuf==3.20.* - - - name: Cache kaldifeat - id: my-cache - uses: actions/cache@v2 - with: - path: | - ~/tmp/kaldifeat - key: cache-tmp-${{ matrix.python-version }}-2023-05-22 - - - name: Install kaldifeat - if: steps.my-cache.outputs.cache-hit != 'true' - shell: bash - run: | - make -j2 _kaldifeat - - - name: Inference with pre-trained model - shell: bash - run: | - sudo apt-get -qq install git-lfs tree - export PYTHONPATH=$PWD:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH - export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH - .github/scripts/run-pre-trained-transducer.sh From f42258caf8a1c4d19428d98b808986522f630843 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sat, 30 Dec 2023 13:03:26 +0800 Subject: [PATCH 09/15] Update compute_fbank_commonvoice_splits.py (#1437) --- egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py b/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py index 0564f6ec6..f31b45aa5 100755 --- a/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py +++ b/egs/commonvoice/ASR/local/compute_fbank_commonvoice_splits.py @@ -109,10 +109,10 @@ def compute_fbank_commonvoice_splits(args): extractor = KaldifeatFbank(KaldifeatFbankConfig(device=device)) logging.info(f"device: {device}") - set_audio_duration_mismatch_tolerance(0.01) # 10ms tolerance + set_audio_duration_mismatch_tolerance(0.05) # 50ms tolerance set_caching_enabled(False) for i in range(start, stop): - idx = f"{i + 1}".zfill(num_digits) + idx = f"{i}".zfill(num_digits) logging.info(f"Processing {idx}/{num_splits}") cuts_path = output_dir / f"cv-{language}_cuts_{subset}.{idx}.jsonl.gz" From 8136ad775b6cd02bf2ecc60d65e8641b709c2d41 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 4 Jan 2024 13:59:32 +0800 Subject: [PATCH 10/15] Use high_freq -400 in computing fbank features. (#1447) See also https://github.com/k2-fsa/sherpa-onnx/issues/514 --- .../ASR/pruned_transducer_stateless2/pretrained.py | 1 + egs/aishell/ASR/conformer_ctc/pretrained.py | 1 + egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py | 1 + egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py | 1 + egs/aishell/ASR/pruned_transducer_stateless7/jit_pretrained.py | 1 + egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py | 1 + egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py | 1 + .../pruned_transducer_stateless7_streaming/streaming_decode.py | 1 + egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py | 1 + egs/aishell/ASR/transducer_stateless/pretrained.py | 1 + egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py | 1 + egs/aishell/ASR/transducer_stateless_modified/pretrained.py | 1 + egs/aishell/ASR/zipformer/streaming_decode.py | 1 + egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py | 1 + egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py | 1 + egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py | 1 + .../ASR/pruned_transducer_stateless7/onnx_pretrained.py | 1 + egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py | 1 + .../pruned_transducer_stateless7_streaming/streaming_decode.py | 1 + .../jit_trace_pretrained.py | 1 + egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py | 1 + .../pruned_transducer_stateless7_streaming/streaming_decode.py | 1 + egs/gigaspeech/ASR/zipformer/streaming_decode.py | 1 + egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py | 1 + .../ASR/conformer_ctc/jit_pretrained_decode_with_H.py | 1 + .../ASR/conformer_ctc/jit_pretrained_decode_with_HL.py | 1 + .../ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py | 1 + egs/librispeech/ASR/conformer_ctc/pretrained.py | 1 + egs/librispeech/ASR/conformer_ctc3/jit_pretrained.py | 1 + egs/librispeech/ASR/conformer_ctc3/pretrained.py | 1 + .../ASR/conv_emformer_transducer_stateless/streaming_decode.py | 1 + .../ASR/conv_emformer_transducer_stateless2/jit_pretrained.py | 1 + .../ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py | 1 + .../conv_emformer_transducer_stateless2/streaming-ncnn-decode.py | 1 + .../ASR/conv_emformer_transducer_stateless2/streaming_decode.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless/jit_pretrained.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py | 1 + .../ASR/lstm_transducer_stateless/streaming_decode.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless2/ncnn-decode.py | 1 + .../ASR/lstm_transducer_stateless2/onnx_pretrained.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py | 1 + .../ASR/lstm_transducer_stateless2/streaming-ncnn-decode.py | 1 + .../ASR/lstm_transducer_stateless2/streaming-onnx-decode.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless3/jit_pretrained.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py | 1 + .../ASR/lstm_transducer_stateless3/streaming_decode.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py | 1 + .../ASR/pruned_transducer_stateless/streaming_decode.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py | 1 + .../ASR/pruned_transducer_stateless2/streaming_decode.py | 1 + .../ASR/pruned_transducer_stateless3/jit_pretrained.py | 1 + .../ASR/pruned_transducer_stateless3/onnx_pretrained.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py | 1 + .../ASR/pruned_transducer_stateless3/streaming_decode.py | 1 + .../ASR/pruned_transducer_stateless4/streaming_decode.py | 1 + .../pruned_transducer_stateless5/onnx_pretrained-streaming.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py | 1 + .../ASR/pruned_transducer_stateless5/streaming_decode.py | 1 + .../ASR/pruned_transducer_stateless7/jit_pretrained.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_ctc/jit_pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py | 1 + .../ASR/pruned_transducer_stateless7_ctc/pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py | 1 + .../ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained.py | 1 + .../pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py | 1 + .../ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py | 1 + .../ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py | 1 + .../jit_trace_pretrained.py | 1 + .../pruned_transducer_stateless7_streaming/onnx_pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_streaming/pretrained.py | 1 + .../streaming-ncnn-decode.py | 1 + .../pruned_transducer_stateless7_streaming/streaming_decode.py | 1 + .../ASR/pruned_transducer_stateless8/jit_pretrained.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py | 1 + egs/librispeech/ASR/tdnn_lstm_ctc/pretrained.py | 1 + egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained.py | 1 + egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained_ctc.py | 1 + egs/librispeech/ASR/tiny_transducer_ctc/pretrained.py | 1 + egs/librispeech/ASR/tiny_transducer_ctc/pretrained_ctc.py | 1 + egs/librispeech/ASR/transducer/pretrained.py | 1 + egs/librispeech/ASR/transducer_stateless/pretrained.py | 1 + egs/librispeech/ASR/transducer_stateless2/pretrained.py | 1 + .../ASR/transducer_stateless_multi_datasets/pretrained.py | 1 + egs/librispeech/ASR/zipformer/jit_pretrained.py | 1 + egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py | 1 + egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained-streaming-ctc.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained_ctc.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_H.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HL.py | 1 + egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HLG.py | 1 + egs/librispeech/ASR/zipformer/pretrained.py | 1 + egs/librispeech/ASR/zipformer/pretrained_ctc.py | 1 + egs/librispeech/ASR/zipformer_mmi/jit_pretrained.py | 1 + egs/librispeech/ASR/zipformer_mmi/pretrained.py | 1 + egs/mgb2/ASR/conformer_ctc/pretrained.py | 1 + egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py | 1 + egs/multi_zh-hans/ASR/zipformer/pretrained.py | 1 + egs/multi_zh_en/ASR/zipformer/pretrained.py | 1 + egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py | 1 + .../ASR/pruned_transducer_stateless7_bbpe/pretrained.py | 1 + egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py | 1 + egs/tedlium3/ASR/transducer_stateless/pretrained.py | 1 + egs/timit/ASR/tdnn_ligru_ctc/pretrained.py | 1 + egs/timit/ASR/tdnn_lstm_ctc/pretrained.py | 1 + .../ASR/pruned_transducer_stateless2/jit_pretrained.py | 1 + egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py | 1 + .../pruned_transducer_stateless5/onnx_pretrained-streaming.py | 1 + .../ASR/pruned_transducer_stateless5/onnx_pretrained.py | 1 + egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py | 1 + .../ASR/pruned_transducer_stateless5/streaming_decode.py | 1 + egs/wenetspeech/ASR/zipformer/streaming_decode.py | 1 + egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py | 1 + egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py | 1 + egs/yesno/ASR/tdnn/jit_pretrained.py | 1 + egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py | 1 + egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py | 1 + egs/yesno/ASR/tdnn/onnx_pretrained.py | 1 + egs/yesno/ASR/tdnn/pretrained.py | 1 + 127 files changed, 127 insertions(+) diff --git a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py index 75c316eaf..17729e02e 100644 --- a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/pretrained.py @@ -242,6 +242,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/conformer_ctc/pretrained.py b/egs/aishell/ASR/conformer_ctc/pretrained.py index 66d583396..af1171a6f 100755 --- a/egs/aishell/ASR/conformer_ctc/pretrained.py +++ b/egs/aishell/ASR/conformer_ctc/pretrained.py @@ -261,6 +261,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py index 82c10f129..c4aa98358 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless2/pretrained.py @@ -240,6 +240,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py index ead393e6e..69fe3a40b 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless3/pretrained.py @@ -241,6 +241,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/jit_pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7/jit_pretrained.py index e61190649..5143f2cae 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/jit_pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/jit_pretrained.py @@ -230,6 +230,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py index a92182e8d..8e8e971eb 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/onnx_pretrained.py @@ -369,6 +369,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py index 0c43bf74b..8fb7ac278 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py @@ -227,6 +227,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py index ea5bda4db..12004315b 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_bbpe/pretrained.py @@ -250,6 +250,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index 6b4f183cf..aa0e07c83 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -317,6 +317,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py b/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py index 7e7213501..9754b4939 100644 --- a/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py +++ b/egs/aishell/ASR/tdnn_lstm_ctc/pretrained.py @@ -158,6 +158,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/transducer_stateless/pretrained.py b/egs/aishell/ASR/transducer_stateless/pretrained.py index 40f430e13..540e7b61b 100755 --- a/egs/aishell/ASR/transducer_stateless/pretrained.py +++ b/egs/aishell/ASR/transducer_stateless/pretrained.py @@ -258,6 +258,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py b/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py index 5d8ca2e11..4a4e9237c 100755 --- a/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py +++ b/egs/aishell/ASR/transducer_stateless_modified-2/pretrained.py @@ -238,6 +238,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/transducer_stateless_modified/pretrained.py b/egs/aishell/ASR/transducer_stateless_modified/pretrained.py index 9e4459247..66a91709e 100755 --- a/egs/aishell/ASR/transducer_stateless_modified/pretrained.py +++ b/egs/aishell/ASR/transducer_stateless_modified/pretrained.py @@ -238,6 +238,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell/ASR/zipformer/streaming_decode.py b/egs/aishell/ASR/zipformer/streaming_decode.py index c3820447a..f54ffbd3c 100755 --- a/egs/aishell/ASR/zipformer/streaming_decode.py +++ b/egs/aishell/ASR/zipformer/streaming_decode.py @@ -572,6 +572,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 100 diff --git a/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py b/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py index bc3ae7abf..f04632388 100755 --- a/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/aishell2/ASR/pruned_transducer_stateless5/pretrained.py @@ -239,6 +239,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py b/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py index ee898c303..e8b7f71b7 100755 --- a/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/aishell4/ASR/pruned_transducer_stateless5/pretrained.py @@ -251,6 +251,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py b/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py index f5a0dd8c8..a738bb3fb 100644 --- a/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/alimeeting/ASR/pruned_transducer_stateless2/pretrained.py @@ -242,6 +242,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py index cf6ddfa36..52fed7331 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/onnx_pretrained.py @@ -370,6 +370,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py index a22d1b4ba..b6e2451e8 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index dbe65d0a7..018736d26 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -320,6 +320,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py index d84cf04a3..58ee99e6a 100644 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py @@ -177,6 +177,7 @@ def create_streaming_feature_extractor(sample_rate) -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py index 932026868..66fbae378 100644 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/pretrained.py @@ -252,6 +252,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index 9700dd89e..7252665a7 100755 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -337,6 +337,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/gigaspeech/ASR/zipformer/streaming_decode.py b/egs/gigaspeech/ASR/zipformer/streaming_decode.py index a76788859..09df2935c 100755 --- a/egs/gigaspeech/ASR/zipformer/streaming_decode.py +++ b/egs/gigaspeech/ASR/zipformer/streaming_decode.py @@ -553,6 +553,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 100 diff --git a/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py b/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py index 48fd2612a..458109a3f 100644 --- a/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py +++ b/egs/libriheavy/ASR/zipformer_prompt_asr/pretrained.py @@ -264,6 +264,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py index 4bdec9e11..e9acf7e0b 100755 --- a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py +++ b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py @@ -195,6 +195,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py index d5a1dba3c..5753aa5d3 100755 --- a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py +++ b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py @@ -192,6 +192,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py index 216677a23..b6e3333ce 100755 --- a/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py +++ b/egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HLG.py @@ -191,6 +191,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc/pretrained.py b/egs/librispeech/ASR/conformer_ctc/pretrained.py index df3e4d819..38b60fcb9 100755 --- a/egs/librispeech/ASR/conformer_ctc/pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc/pretrained.py @@ -283,6 +283,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc3/jit_pretrained.py b/egs/librispeech/ASR/conformer_ctc3/jit_pretrained.py index 76db46cc8..19b26361e 100755 --- a/egs/librispeech/ASR/conformer_ctc3/jit_pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc3/jit_pretrained.py @@ -271,6 +271,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conformer_ctc3/pretrained.py b/egs/librispeech/ASR/conformer_ctc3/pretrained.py index c37b99cce..a0cdfcf03 100755 --- a/egs/librispeech/ASR/conformer_ctc3/pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc3/pretrained.py @@ -302,6 +302,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py index e5a7c7116..9b8b4cce2 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py @@ -623,6 +623,7 @@ def create_streaming_feature_extractor() -> Fbank: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return Fbank(opts) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/jit_pretrained.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/jit_pretrained.py index 1fe358c79..58f587c91 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/jit_pretrained.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/jit_pretrained.py @@ -184,6 +184,7 @@ def create_streaming_feature_extractor(sample_rate) -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py index a6c69d54f..c8aae04e8 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/onnx_pretrained.py @@ -326,6 +326,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming-ncnn-decode.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming-ncnn-decode.py index 74da9e6c8..1047100fc 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming-ncnn-decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming-ncnn-decode.py @@ -276,6 +276,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py index f5d894a7b..aaed7d31f 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py @@ -623,6 +623,7 @@ def create_streaming_feature_extractor() -> Fbank: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/jit_pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless/jit_pretrained.py index c07956243..5350a54da 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/jit_pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/jit_pretrained.py @@ -266,6 +266,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py index 119fcf1fd..42c3a5d7f 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/pretrained.py @@ -251,6 +251,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py index f989d9bc0..03472e2c3 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py @@ -615,6 +615,7 @@ def create_streaming_feature_extractor() -> Fbank: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py index 728b09104..f4ec17221 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/jit_pretrained.py @@ -267,6 +267,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/ncnn-decode.py b/egs/librispeech/ASR/lstm_transducer_stateless2/ncnn-decode.py index 3eeaa5397..5bab70fb0 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/ncnn-decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/ncnn-decode.py @@ -255,6 +255,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py index 06159e56a..06397965d 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/onnx_pretrained.py @@ -298,6 +298,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py index 5d6d97320..dcff088e2 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/pretrained.py @@ -254,6 +254,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-ncnn-decode.py b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-ncnn-decode.py index cbbc77928..6166049ae 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-ncnn-decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-ncnn-decode.py @@ -217,6 +217,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py index 487fc2114..df9f6cf3f 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/streaming-onnx-decode.py @@ -344,6 +344,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/jit_pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless3/jit_pretrained.py index 237591a36..d9e7f3578 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/jit_pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/jit_pretrained.py @@ -266,6 +266,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py b/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py index 29a0d4d1a..e39637bd8 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/pretrained.py @@ -252,6 +252,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py b/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py index c737e3611..c425b1f46 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py @@ -615,6 +615,7 @@ def create_streaming_feature_extractor() -> Fbank: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py index 02f9f1b03..e06404619 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/pretrained.py @@ -277,6 +277,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py index f4b01fd06..8586c66d6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py @@ -334,6 +334,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 100 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py index 029f55ba0..6923f4d40 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/pretrained.py @@ -278,6 +278,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py index 9c4a13606..d17c3467a 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py @@ -336,6 +336,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless3/jit_pretrained.py index 0669284b3..6d09de6bd 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/jit_pretrained.py @@ -285,6 +285,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py index de3e03da6..8d12eae28 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/onnx_pretrained.py @@ -368,6 +368,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py index abda4e2d4..05e6a6fba 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/pretrained.py @@ -287,6 +287,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py index e7c1affc2..5e1acd735 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py @@ -337,6 +337,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py index e966aa4b1..229b52e5b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py @@ -353,6 +353,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py b/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py index 6e290e799..2432c6010 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py @@ -326,6 +326,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py index 304fa8693..a9ce75a7b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/pretrained.py @@ -251,6 +251,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py index f65f47fc2..8478a65fb 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py @@ -353,6 +353,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7/jit_pretrained.py index 5af6dae25..88a05e09d 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/jit_pretrained.py @@ -225,6 +225,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py index 86c922cda..4bf11ac24 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained.py index 280b95984..83dc29324 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained.py @@ -224,6 +224,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py index d50d231d5..d1b7eec65 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py @@ -280,6 +280,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py index 78e0fa778..323ba2642 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py index 904c1deae..1e638aa7d 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc/pretrained_ctc.py @@ -298,6 +298,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained.py index da2c6a39a..a39fdee54 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained.py @@ -224,6 +224,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py index 653c25e06..80604ef4a 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py @@ -280,6 +280,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py index 494a34d97..0ff110370 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/onnx_pretrained.py @@ -381,6 +381,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py index 5d240cf30..a82f3562b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py index 914107526..b98756a54 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py @@ -298,6 +298,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py index c8301b2da..7116b10fb 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_pretrained.py @@ -231,6 +231,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py index f2ac1914d..d714670cf 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/jit_trace_pretrained.py @@ -186,6 +186,7 @@ def create_streaming_feature_extractor(sample_rate) -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py index 04861ea37..298d1889b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/onnx_pretrained.py @@ -382,6 +382,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py index bc42e8d05..aa2dd17fb 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py index 883fdcbdd..999f7e0b4 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py @@ -335,6 +335,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index a0f54b6e1..e27fb4e63 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -320,6 +320,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 50 diff --git a/egs/librispeech/ASR/pruned_transducer_stateless8/jit_pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless8/jit_pretrained.py index 129497d5a..3ce2953c3 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless8/jit_pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless8/jit_pretrained.py @@ -225,6 +225,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py b/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py index 64b38c9d5..c29b8d8c9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless8/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/tdnn_lstm_ctc/pretrained.py b/egs/librispeech/ASR/tdnn_lstm_ctc/pretrained.py index fde724866..b3dfab64a 100755 --- a/egs/librispeech/ASR/tdnn_lstm_ctc/pretrained.py +++ b/egs/librispeech/ASR/tdnn_lstm_ctc/pretrained.py @@ -196,6 +196,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained.py b/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained.py index 3888d3544..0cd876551 100755 --- a/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained.py +++ b/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained.py @@ -224,6 +224,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained_ctc.py b/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained_ctc.py index 6f2cbaabd..92dea3aa1 100755 --- a/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/tiny_transducer_ctc/jit_pretrained_ctc.py @@ -280,6 +280,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/tiny_transducer_ctc/pretrained.py b/egs/librispeech/ASR/tiny_transducer_ctc/pretrained.py index 981039b8f..5c6956324 100755 --- a/egs/librispeech/ASR/tiny_transducer_ctc/pretrained.py +++ b/egs/librispeech/ASR/tiny_transducer_ctc/pretrained.py @@ -262,6 +262,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/tiny_transducer_ctc/pretrained_ctc.py b/egs/librispeech/ASR/tiny_transducer_ctc/pretrained_ctc.py index a06d6d684..7698ada79 100755 --- a/egs/librispeech/ASR/tiny_transducer_ctc/pretrained_ctc.py +++ b/egs/librispeech/ASR/tiny_transducer_ctc/pretrained_ctc.py @@ -298,6 +298,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/transducer/pretrained.py b/egs/librispeech/ASR/transducer/pretrained.py index c2413f5de..4d9bbf4b1 100755 --- a/egs/librispeech/ASR/transducer/pretrained.py +++ b/egs/librispeech/ASR/transducer/pretrained.py @@ -235,6 +235,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/transducer_stateless/pretrained.py b/egs/librispeech/ASR/transducer_stateless/pretrained.py index 5898dd0f5..3b86e319e 100755 --- a/egs/librispeech/ASR/transducer_stateless/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless/pretrained.py @@ -247,6 +247,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/transducer_stateless2/pretrained.py b/egs/librispeech/ASR/transducer_stateless2/pretrained.py index b69b347ef..2de4182f1 100755 --- a/egs/librispeech/ASR/transducer_stateless2/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless2/pretrained.py @@ -247,6 +247,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py index 4f29d6f1f..83094ea51 100755 --- a/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py +++ b/egs/librispeech/ASR/transducer_stateless_multi_datasets/pretrained.py @@ -247,6 +247,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained.py b/egs/librispeech/ASR/zipformer/jit_pretrained.py index a41fbc1c9..52dfd3fb6 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained.py @@ -222,6 +222,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py index 660a4bfc6..fcd07ae34 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_ctc.py @@ -285,6 +285,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py b/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py index d4ceacefd..eade5a854 100755 --- a/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py +++ b/egs/librispeech/ASR/zipformer/jit_pretrained_streaming.py @@ -167,6 +167,7 @@ def create_streaming_feature_extractor(sample_rate) -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming-ctc.py b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming-ctc.py index 44546cae5..dd47c0eb6 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming-ctc.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming-ctc.py @@ -318,6 +318,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py index e7c4f40ee..e011c4b24 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained-streaming.py @@ -413,6 +413,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained.py b/egs/librispeech/ASR/zipformer/onnx_pretrained.py index 334376093..662392b5f 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained.py @@ -369,6 +369,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc.py b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc.py index eb5cee9cd..ecca758f2 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc.py @@ -161,6 +161,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_H.py b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_H.py index 683a7dc20..a77c3bf2a 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_H.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_H.py @@ -225,6 +225,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 logging.info(f"Loading H from {args.H}") H = kaldifst.StdVectorFst.read(args.H) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HL.py b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HL.py index 0b94bfa65..6ef944514 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HL.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HL.py @@ -223,6 +223,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 logging.info(f"Loading HL from {args.HL}") HL = kaldifst.StdVectorFst.read(args.HL) diff --git a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HLG.py b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HLG.py index 93569142a..ccb3107ea 100755 --- a/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HLG.py +++ b/egs/librispeech/ASR/zipformer/onnx_pretrained_ctc_HLG.py @@ -223,6 +223,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 logging.info(f"Loading HLG from {args.HLG}") HLG = kaldifst.StdVectorFst.read(args.HLG) diff --git a/egs/librispeech/ASR/zipformer/pretrained.py b/egs/librispeech/ASR/zipformer/pretrained.py index 3104b6084..de0652893 100755 --- a/egs/librispeech/ASR/zipformer/pretrained.py +++ b/egs/librispeech/ASR/zipformer/pretrained.py @@ -303,6 +303,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer/pretrained_ctc.py b/egs/librispeech/ASR/zipformer/pretrained_ctc.py index 9dff2e6fc..408d13576 100755 --- a/egs/librispeech/ASR/zipformer/pretrained_ctc.py +++ b/egs/librispeech/ASR/zipformer/pretrained_ctc.py @@ -304,6 +304,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer_mmi/jit_pretrained.py b/egs/librispeech/ASR/zipformer_mmi/jit_pretrained.py index c9ef16ffa..6990c90a0 100755 --- a/egs/librispeech/ASR/zipformer_mmi/jit_pretrained.py +++ b/egs/librispeech/ASR/zipformer_mmi/jit_pretrained.py @@ -259,6 +259,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/librispeech/ASR/zipformer_mmi/pretrained.py b/egs/librispeech/ASR/zipformer_mmi/pretrained.py index 3ba4da5dd..1e7afc777 100755 --- a/egs/librispeech/ASR/zipformer_mmi/pretrained.py +++ b/egs/librispeech/ASR/zipformer_mmi/pretrained.py @@ -282,6 +282,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/mgb2/ASR/conformer_ctc/pretrained.py b/egs/mgb2/ASR/conformer_ctc/pretrained.py index d30ca98d8..0ab2af527 100755 --- a/egs/mgb2/ASR/conformer_ctc/pretrained.py +++ b/egs/mgb2/ASR/conformer_ctc/pretrained.py @@ -287,6 +287,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py b/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py index 77ba0873b..81a16f0ff 100755 --- a/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/mgb2/ASR/pruned_transducer_stateless5/pretrained.py @@ -249,6 +249,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/multi_zh-hans/ASR/zipformer/pretrained.py b/egs/multi_zh-hans/ASR/zipformer/pretrained.py index 69ff382da..c15db11f7 100755 --- a/egs/multi_zh-hans/ASR/zipformer/pretrained.py +++ b/egs/multi_zh-hans/ASR/zipformer/pretrained.py @@ -303,6 +303,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/multi_zh_en/ASR/zipformer/pretrained.py b/egs/multi_zh_en/ASR/zipformer/pretrained.py index 676272e1f..2fcde550b 100755 --- a/egs/multi_zh_en/ASR/zipformer/pretrained.py +++ b/egs/multi_zh_en/ASR/zipformer/pretrained.py @@ -306,6 +306,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py b/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py index 3305f5bd3..8a74ee745 100755 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless5/pretrained.py @@ -248,6 +248,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py index a23e2a04f..8c966a2f6 100755 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/jit_pretrained.py @@ -226,6 +226,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py index f365986f6..6e07b5949 100755 --- a/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py +++ b/egs/tal_csasr/ASR/pruned_transducer_stateless7_bbpe/pretrained.py @@ -261,6 +261,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py b/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py index 8a89c3578..9e58fed00 100644 --- a/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py +++ b/egs/tedlium3/ASR/pruned_transducer_stateless/pretrained.py @@ -256,6 +256,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/tedlium3/ASR/transducer_stateless/pretrained.py b/egs/tedlium3/ASR/transducer_stateless/pretrained.py index 81afd6a4e..5300fe764 100644 --- a/egs/tedlium3/ASR/transducer_stateless/pretrained.py +++ b/egs/tedlium3/ASR/transducer_stateless/pretrained.py @@ -270,6 +270,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py b/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py index 3fdf3b855..0d77bc512 100644 --- a/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py +++ b/egs/timit/ASR/tdnn_ligru_ctc/pretrained.py @@ -196,6 +196,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py b/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py index 98c746ce5..f06c8c211 100644 --- a/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py +++ b/egs/timit/ASR/tdnn_lstm_ctc/pretrained.py @@ -196,6 +196,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/jit_pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/jit_pretrained.py index f90dd2b43..aee1a2175 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/jit_pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/jit_pretrained.py @@ -285,6 +285,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py index c3d67ad92..642de72d7 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/pretrained.py @@ -238,6 +238,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py index c31db6859..cca26feb0 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained-streaming.py @@ -327,6 +327,7 @@ def create_streaming_feature_extractor() -> OnlineFeature: opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 return OnlineFbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py index c784853ee..4b4ddd332 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/onnx_pretrained.py @@ -376,6 +376,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = args.sample_rate opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py index 1cac20435..17428e19d 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/pretrained.py @@ -238,6 +238,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py index 3a4dc3cb8..27a9b1714 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py @@ -378,6 +378,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 100 diff --git a/egs/wenetspeech/ASR/zipformer/streaming_decode.py b/egs/wenetspeech/ASR/zipformer/streaming_decode.py index 94c5fae5f..96f339b07 100755 --- a/egs/wenetspeech/ASR/zipformer/streaming_decode.py +++ b/egs/wenetspeech/ASR/zipformer/streaming_decode.py @@ -572,6 +572,7 @@ def decode_dataset( opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = 16000 opts.mel_opts.num_bins = 80 + opts.mel_opts.high_freq = -400 log_interval = 100 diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py index 74a2210c3..2c106c4cb 100755 --- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py +++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless5/pretrained.py @@ -249,6 +249,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py index d05bafcfb..6995ff2ff 100755 --- a/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py +++ b/egs/xbmu_amdo31/ASR/pruned_transducer_stateless7/pretrained.py @@ -260,6 +260,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/yesno/ASR/tdnn/jit_pretrained.py b/egs/yesno/ASR/tdnn/jit_pretrained.py index 7581ecb83..e29415ffb 100755 --- a/egs/yesno/ASR/tdnn/jit_pretrained.py +++ b/egs/yesno/ASR/tdnn/jit_pretrained.py @@ -142,6 +142,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py b/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py index ff8c742af..72127aebd 100755 --- a/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py +++ b/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py @@ -164,6 +164,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 23 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py b/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py index 05ba74f9a..f8a057336 100755 --- a/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py +++ b/egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py @@ -163,6 +163,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = sample_rate opts.mel_opts.num_bins = 23 + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/yesno/ASR/tdnn/onnx_pretrained.py b/egs/yesno/ASR/tdnn/onnx_pretrained.py index 72a1d69c8..968a9e9a8 100755 --- a/egs/yesno/ASR/tdnn/onnx_pretrained.py +++ b/egs/yesno/ASR/tdnn/onnx_pretrained.py @@ -186,6 +186,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) diff --git a/egs/yesno/ASR/tdnn/pretrained.py b/egs/yesno/ASR/tdnn/pretrained.py index 987c49de6..bea520998 100755 --- a/egs/yesno/ASR/tdnn/pretrained.py +++ b/egs/yesno/ASR/tdnn/pretrained.py @@ -164,6 +164,7 @@ def main(): opts.frame_opts.snip_edges = False opts.frame_opts.samp_freq = params.sample_rate opts.mel_opts.num_bins = params.feature_dim + opts.mel_opts.high_freq = -400 fbank = kaldifeat.Fbank(opts) From 716b82cc3ada9e39254acc93465ed85e53d05670 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Fri, 5 Jan 2024 03:21:27 +0100 Subject: [PATCH 11/15] streaming_decode.py, relax the audio range from [-1,+1] to [-10,+10] (#1448) - some AudioTransform classes produce audio signals out of range [-1,+1] - Resample produced 1.0079 - The range [-10,+10] was chosen to still be able to reliably distinguish from the [-32k,+32k] signal... - this is related to : https://github.com/lhotse-speech/lhotse/issues/1254 --- .../streaming_decode.py | 7 ++++++- egs/aishell/ASR/zipformer/streaming_decode.py | 12 ++++++------ .../streaming_decode.py | 7 ++++++- egs/gigaspeech/ASR/zipformer/streaming_decode.py | 7 ++++++- .../streaming_decode.py | 8 +++++++- .../streaming_decode.py | 8 +++++++- .../lstm_transducer_stateless/streaming_decode.py | 8 +++++++- .../lstm_transducer_stateless3/streaming_decode.py | 8 +++++++- .../pruned_transducer_stateless/streaming_decode.py | 7 ++++++- .../pruned_transducer_stateless2/streaming_decode.py | 7 ++++++- .../pruned_transducer_stateless3/streaming_decode.py | 7 ++++++- .../pruned_transducer_stateless4/streaming_decode.py | 7 ++++++- .../pruned_transducer_stateless5/streaming_decode.py | 7 ++++++- .../streaming_decode.py | 7 ++++++- .../streaming_decode.py | 7 ++++++- egs/librispeech/ASR/zipformer/streaming_decode.py | 7 ++++++- .../pruned_transducer_stateless5/streaming_decode.py | 8 ++++++++ egs/wenetspeech/ASR/zipformer/streaming_decode.py | 12 ++++++------ 18 files changed, 114 insertions(+), 27 deletions(-) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index aa0e07c83..a4b5cd588 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -342,7 +342,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/aishell/ASR/zipformer/streaming_decode.py b/egs/aishell/ASR/zipformer/streaming_decode.py index f54ffbd3c..6a7ef2750 100755 --- a/egs/aishell/ASR/zipformer/streaming_decode.py +++ b/egs/aishell/ASR/zipformer/streaming_decode.py @@ -597,12 +597,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - if audio.max() > 1: - logging.warning( - f"The audio should be normalized to [-1, 1], audio.max : {audio.max()}." - f"Clipping to [-1, 1]." - ) - audio = np.clip(audio, -1, 1) + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index 7252665a7..6a249dd3f 100755 --- a/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/csj/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -362,7 +362,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/gigaspeech/ASR/zipformer/streaming_decode.py b/egs/gigaspeech/ASR/zipformer/streaming_decode.py index 09df2935c..7cada8c9d 100755 --- a/egs/gigaspeech/ASR/zipformer/streaming_decode.py +++ b/egs/gigaspeech/ASR/zipformer/streaming_decode.py @@ -578,7 +578,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py index 9b8b4cce2..12953c74c 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless/streaming_decode.py @@ -681,8 +681,14 @@ def decode_dataset( assert len(audio.shape) == 2 assert audio.shape[0] == 1, "Should be single channel" assert audio.dtype == np.float32, audio.dtype + # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) feature = fbank(samples) diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py index aaed7d31f..ddc7dbef1 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/streaming_decode.py @@ -681,8 +681,14 @@ def decode_dataset( assert len(audio.shape) == 2 assert audio.shape[0] == 1, "Should be single channel" assert audio.dtype == np.float32, audio.dtype + # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) feature = fbank(samples) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py index 03472e2c3..14cb0fdfe 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless/streaming_decode.py @@ -673,8 +673,14 @@ def decode_dataset( assert len(audio.shape) == 2 assert audio.shape[0] == 1, "Should be single channel" assert audio.dtype == np.float32, audio.dtype + # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) feature = fbank(samples) diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py b/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py index c425b1f46..f57bdea67 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/streaming_decode.py @@ -673,8 +673,14 @@ def decode_dataset( assert len(audio.shape) == 2 assert audio.shape[0] == 1, "Should be single channel" assert audio.dtype == np.float32, audio.dtype + # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) feature = fbank(samples) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py index 8586c66d6..4726d9fad 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/streaming_decode.py @@ -359,7 +359,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py index d17c3467a..381561359 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless2/streaming_decode.py @@ -361,7 +361,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py index 5e1acd735..9113cfaa9 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/streaming_decode.py @@ -362,7 +362,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py index 229b52e5b..f205ad42f 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless4/streaming_decode.py @@ -378,7 +378,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py index 8478a65fb..1d980f10e 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/streaming_decode.py @@ -378,7 +378,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py index e27fb4e63..0961e0d7b 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/streaming_decode.py @@ -345,7 +345,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/streaming_decode.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/streaming_decode.py index 2904f086c..cc2787d76 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/streaming_decode.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/streaming_decode.py @@ -345,7 +345,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/librispeech/ASR/zipformer/streaming_decode.py b/egs/librispeech/ASR/zipformer/streaming_decode.py index 904caf8af..8087c1460 100755 --- a/egs/librispeech/ASR/zipformer/streaming_decode.py +++ b/egs/librispeech/ASR/zipformer/streaming_decode.py @@ -577,7 +577,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - assert audio.max() <= 1, "Should be normalized to [-1, 1])" + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py index 27a9b1714..b396aa9b8 100644 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/streaming_decode.py @@ -402,6 +402,14 @@ def decode_dataset( assert audio.shape[0] == 1, "Should be single channel" assert audio.dtype == np.float32, audio.dtype + # The trained model is using normalized samples + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." + samples = torch.from_numpy(audio).squeeze(0) fbank = Fbank(opts) diff --git a/egs/wenetspeech/ASR/zipformer/streaming_decode.py b/egs/wenetspeech/ASR/zipformer/streaming_decode.py index 96f339b07..cb2cf7d35 100755 --- a/egs/wenetspeech/ASR/zipformer/streaming_decode.py +++ b/egs/wenetspeech/ASR/zipformer/streaming_decode.py @@ -597,12 +597,12 @@ def decode_dataset( assert audio.dtype == np.float32, audio.dtype # The trained model is using normalized samples - if audio.max() > 1: - logging.warning( - f"The audio should be normalized to [-1, 1], audio.max : {audio.max()}." - f"Clipping to [-1, 1]." - ) - audio = np.clip(audio, -1, 1) + # - this is to avoid sending [-32k,+32k] signal in... + # - some lhotse AudioTransform classes can make the signal + # be out of range [-1, 1], hence the tolerance 10 + assert ( + np.abs(audio).max() <= 10 + ), "Should be normalized to [-1, 1], 10 for tolerance..." samples = torch.from_numpy(audio).squeeze(0) From b9b56eb879e694684156b6ba441a1c665ff26e19 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 8 Jan 2024 14:28:07 +0800 Subject: [PATCH 12/15] Minor fixes to the VCTK data prep scripts (#1441) * Update prepare.sh --- egs/vctk/TTS/prepare.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/egs/vctk/TTS/prepare.sh b/egs/vctk/TTS/prepare.sh index 87150ad31..152c7b168 100755 --- a/egs/vctk/TTS/prepare.sh +++ b/egs/vctk/TTS/prepare.sh @@ -7,6 +7,7 @@ set -eou pipefail stage=0 stop_stage=100 +use_edinburgh_vctk_url=true dl_dir=$PWD/download @@ -44,7 +45,7 @@ if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then # ln -sfv /path/to/VCTK $dl_dir/VCTK # if [ ! -d $dl_dir/VCTK ]; then - lhotse download vctk $dl_dir + lhotse download vctk --use-edinburgh-vctk-url ${use_edinburgh_vctk_url} $dl_dir fi fi @@ -54,7 +55,7 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then # to $dl_dir/VCTK mkdir -p data/manifests if [ ! -e data/manifests/.vctk.done ]; then - lhotse prepare vctk --use-edinburgh-vctk-url true $dl_dir/VCTK data/manifests + lhotse prepare vctk --use-edinburgh-vctk-url ${use_edinburgh_vctk_url} $dl_dir/VCTK data/manifests touch data/manifests/.vctk.done fi fi From 5445ea6df6e250f8f1e9b2df3bb9e54afe104f97 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 8 Jan 2024 15:09:21 +0800 Subject: [PATCH 13/15] Use shuffled LibriSpeech cuts instead (#1450) * use shuffled LibriSpeech cuts instead * leave the old code in comments for reference --- egs/librispeech/ASR/conformer_ctc3/train.py | 15 ++++++++++++--- egs/librispeech/ASR/conformer_mmi/train.py | 16 +++++++++++++--- .../ASR/lstm_transducer_stateless3/train.py | 15 ++++++++++++--- .../ASR/pruned2_knowledge/train.py | 15 ++++++++++++--- .../train.py | 19 ++++++++++++++++--- .../train.py | 11 ++++++++--- egs/librispeech/ASR/zipformer/train.py | 15 ++++++++++++--- egs/librispeech/ASR/zipformer_mmi/train.py | 8 +++++--- 8 files changed, 90 insertions(+), 24 deletions(-) diff --git a/egs/librispeech/ASR/conformer_ctc3/train.py b/egs/librispeech/ASR/conformer_ctc3/train.py index 2cd223945..a2f1125ca 100755 --- a/egs/librispeech/ASR/conformer_ctc3/train.py +++ b/egs/librispeech/ASR/conformer_ctc3/train.py @@ -952,10 +952,19 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts + # strictly speaking, shuffled training cuts should be used instead + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/conformer_mmi/train.py b/egs/librispeech/ASR/conformer_mmi/train.py index f9f80632e..fe8c85f61 100755 --- a/egs/librispeech/ASR/conformer_mmi/train.py +++ b/egs/librispeech/ASR/conformer_mmi/train.py @@ -771,10 +771,20 @@ def run(rank, world_size, args): valid_ali = None librispeech = LibriSpeechAsrDataModule(args) - train_cuts = librispeech.train_clean_100_cuts() + if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/lstm_transducer_stateless3/train.py b/egs/librispeech/ASR/lstm_transducer_stateless3/train.py index 6ef4c9860..2c1cef3a3 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless3/train.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless3/train.py @@ -989,10 +989,19 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/pruned2_knowledge/train.py b/egs/librispeech/ASR/pruned2_knowledge/train.py index a4899f7bd..931341cc4 100755 --- a/egs/librispeech/ASR/pruned2_knowledge/train.py +++ b/egs/librispeech/ASR/pruned2_knowledge/train.py @@ -817,10 +817,19 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py index 2d915ff87..e1bdce49d 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/train.py @@ -1038,13 +1038,26 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) + assert not ( + params.mini_libri and params.full_libri + ), f"Cannot set both mini-libri and full-libri flags to True, now mini-libri {params.mini_libri} and full-libri {params.full_libri}" + if params.mini_libri: train_cuts = librispeech.train_clean_5_cuts() else: - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py index 565dc7a16..1642ef4b7 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming_multi/train.py @@ -1150,10 +1150,15 @@ def run(rank, world_size, args): librispeech = LibriSpeech(manifest_dir=args.manifest_dir) - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + else: + train_cuts = librispeech.train_clean_100_cuts() train_cuts = filter_short_and_long_utterances(train_cuts, sp) diff --git a/egs/librispeech/ASR/zipformer/train.py b/egs/librispeech/ASR/zipformer/train.py index 7009f3346..3ccf7d2f1 100755 --- a/egs/librispeech/ASR/zipformer/train.py +++ b/egs/librispeech/ASR/zipformer/train.py @@ -1174,10 +1174,19 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) - train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - train_cuts += librispeech.train_clean_360_cuts() - train_cuts += librispeech.train_other_500_cuts() + train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets + + # train_cuts = librispeech.train_clean_100_cuts() + # train_cuts += librispeech.train_clean_360_cuts() + # train_cuts += librispeech.train_other_500_cuts() + else: + train_cuts = librispeech.train_clean_100_cuts() def remove_short_and_long_utt(c: Cut): # Keep only utterances with duration between 1 second and 20 seconds diff --git a/egs/librispeech/ASR/zipformer_mmi/train.py b/egs/librispeech/ASR/zipformer_mmi/train.py index 4b50acdde..dd8949523 100755 --- a/egs/librispeech/ASR/zipformer_mmi/train.py +++ b/egs/librispeech/ASR/zipformer_mmi/train.py @@ -990,11 +990,13 @@ def run(rank, world_size, args): librispeech = LibriSpeechAsrDataModule(args) - # train_cuts = librispeech.train_clean_100_cuts() if params.full_libri: - # train_cuts += librispeech.train_clean_360_cuts() - # train_cuts += librispeech.train_other_500_cuts() train_cuts = librispeech.train_all_shuf_cuts() + + # previously we used the following code to load all training cuts, + # strictly speaking, shuffled training cuts should be used instead, + # but we leave the code here to demonstrate that there is an option + # like this to combine multiple cutsets else: train_cuts = librispeech.train_clean_100_cuts() From e2fcb42f5f176d9e39eb38506ab99d0a3adaf202 Mon Sep 17 00:00:00 2001 From: Xiaoyu Yang <45973641+marcoyang1998@users.noreply.github.com> Date: Tue, 9 Jan 2024 15:41:37 +0800 Subject: [PATCH 14/15] fix typo (#1455) --- .../RNN-LM/librispeech/lm-training.rst | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/docs/source/recipes/RNN-LM/librispeech/lm-training.rst b/docs/source/recipes/RNN-LM/librispeech/lm-training.rst index 46499a374..e0c90f2a6 100644 --- a/docs/source/recipes/RNN-LM/librispeech/lm-training.rst +++ b/docs/source/recipes/RNN-LM/librispeech/lm-training.rst @@ -4,7 +4,7 @@ Train an RNN language model ====================================== If you have enough text data, you can train a neural network language model (NNLM) to improve -the WER of your E2E ASR system. This tutorial shows you how to train an RNNLM from +the WER of your E2E ASR system. This tutorial shows you how to train an RNNLM from scratch. .. HINT:: @@ -15,23 +15,23 @@ scratch. .. note:: This tutorial is based on the LibriSpeech recipe. Please check it out for the necessary - python scripts for this tutorial. We use the LibriSpeech LM-corpus as the LM training set + python scripts for this tutorial. We use the LibriSpeech LM-corpus as the LM training set for illustration purpose. You can also collect your own data. The data format is quite simple: each line should contain a complete sentence, and words should be separated by space. -First, let's download the training data for the RNNLM. This can be done via the +First, let's download the training data for the RNNLM. This can be done via the following command: .. code-block:: bash - $ wget https://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz + $ wget https://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz $ gzip -d librispeech-lm-norm.txt.gz As we are training a BPE-level RNNLM, we need to tokenize the training text, which requires a BPE tokenizer. This can be achieved by executing the following command: .. code-block:: bash - + $ # if you don't have the BPE $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 $ cd icefall-asr-librispeech-zipformer-2023-05-15/data/lang_bpe_500 @@ -56,11 +56,11 @@ sentence length. --out-statistics data/lang_bpe_500/lm_data_stats.txt -The aforementioned steps can be repeated to create a a validation set for you RNNLM. Let's say -you have a validation set in ``valid.txt``, you can just set ``--lm-data valid.txt`` +The aforementioned steps can be repeated to create a a validation set for you RNNLM. Let's say +you have a validation set in ``valid.txt``, you can just set ``--lm-data valid.txt`` and ``--lm-archive data/lang_bpe_500/lm-data-valid.pt`` when calling ``./local/prepare_lm_training_data.py``. -After completing the previous steps, the training and testing sets for training RNNLM are ready. +After completing the previous steps, the training and testing sets for training RNNLM are ready. The next step is to train the RNNLM model. The training command is as follows: .. code-block:: bash @@ -77,7 +77,7 @@ The next step is to train the RNNLM model. The training command is as follows: --use-fp16 0 \ --tie-weights 1 \ --embedding-dim 2048 \ - --hidden_dim 2048 \ + --hidden-dim 2048 \ --num-layers 3 \ --batch-size 300 \ --lm-data rnn_lm/data/lang_bpe_500/sorted_lm_data.pt \ @@ -93,12 +93,3 @@ The next step is to train the RNNLM model. The training command is as follows: .. note:: The training of RNNLM can take a long time (usually a couple of days). - - - - - - - - - From 398401ed277d4f895f624a95919c57edbbde4cba Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 14 Jan 2024 14:38:41 +0800 Subject: [PATCH 15/15] Update kaldifeat installation doc (#1460) --- docs/source/for-dummies/environment-setup.rst | 4 ++-- docs/source/for-dummies/model-export.rst | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/for-dummies/environment-setup.rst b/docs/source/for-dummies/environment-setup.rst index 0cb8ecc1d..a68e9d3ed 100644 --- a/docs/source/for-dummies/environment-setup.rst +++ b/docs/source/for-dummies/environment-setup.rst @@ -66,13 +66,13 @@ to install dependencies of `icefall`_: pip install torch==2.0.0+cpu torchaudio==2.0.0+cpu -f https://download.pytorch.org/whl/torch_stable.html - # If you are using macOS or Windows, please use the following command to install torch and torchaudio + # If you are using macOS, please use the following command to install torch and torchaudio # pip install torch==2.0.0 torchaudio==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html # Now install k2 # Please refer to https://k2-fsa.github.io/k2/installation/from_wheels.html#linux-cpu-example - pip install k2==1.24.3.dev20230726+cpu.torch2.0.0 -f https://k2-fsa.github.io/k2/cpu.html + pip install k2==1.24.4.dev20231220+cpu.torch2.0.0 -f https://k2-fsa.github.io/k2/cpu.html # Install the latest version of lhotse diff --git a/docs/source/for-dummies/model-export.rst b/docs/source/for-dummies/model-export.rst index 079ebc712..352a0dc90 100644 --- a/docs/source/for-dummies/model-export.rst +++ b/docs/source/for-dummies/model-export.rst @@ -85,7 +85,7 @@ We can also use it to decode files with the following command: # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html # for how to install kaldifeat - pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + pip install kaldifeat==1.25.3.dev20231221+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html ./tdnn/pretrained.py \ --checkpoint ./tdnn/exp/pretrained.pt \ @@ -162,7 +162,7 @@ To use ``tdnn/exp/cpu_jit.pt`` with `icefall`_ to decode files, we can use: # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html # for how to install kaldifeat - pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + pip install kaldifeat==1.25.3.dev20231221+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html ./tdnn/jit_pretrained.py \ @@ -249,7 +249,7 @@ To use the generated ONNX model files for decoding with `onnxruntime`_, we can u # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html # for how to install kaldifeat - pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html + pip install kaldifeat==1.25.3.dev20231221+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html ./tdnn/onnx_pretrained.py \ --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \