From 06b356a610ec0bcef8982f011ba2a46cd8ca29b5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 18 Feb 2024 12:05:38 +0800 Subject: [PATCH 1/8] Update cpu docker images to support torch 2.2.0 (#1499) --- .github/scripts/docker/Dockerfile | 1 + .../scripts/docker/generate_build_matrix.py | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index f6a088af1..ee0099911 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -11,6 +11,7 @@ ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}" RUN apt-get update -y && \ apt-get install -qq -y \ + cmake \ ffmpeg \ git \ git-lfs \ diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index bdde97647..f0690f8bf 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -6,8 +6,8 @@ import json def version_gt(a, b): - a_major, a_minor = a.split(".")[:2] - b_major, b_minor = b.split(".")[:2] + a_major, a_minor = list(map(int, a.split(".")))[:2] + b_major, b_minor = list(map(int, b.split(".")))[:2] if a_major > b_major: return True @@ -18,8 +18,8 @@ def version_gt(a, b): def version_ge(a, b): - a_major, a_minor = a.split(".")[:2] - b_major, b_minor = b.split(".")[:2] + a_major, a_minor = list(map(int, a.split(".")))[:2] + b_major, b_minor = list(map(int, b.split(".")))[:2] if a_major > b_major: return True @@ -43,11 +43,12 @@ def get_torchaudio_version(torch_version): def get_matrix(): - k2_version = "1.24.4.dev20231220" - kaldifeat_version = "1.25.3.dev20231221" - version = "1.2" - python_version = ["3.8", "3.9", "3.10", "3.11"] + k2_version = "1.24.4.dev20240211" + kaldifeat_version = "1.25.4.dev20240210" + version = "1.3" + python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] + torch_version += ["2.2.0"] matrix = [] for p in python_version: @@ -57,6 +58,10 @@ def get_matrix(): if version_gt(p, "3.10") and not version_gt(t, "2.0"): continue + # only torch>=2.2.0 supports python 3.12 + if version_gt(p, "3.11") and not version_gt(t, "2.1"): + continue + matrix.append( { "k2-version": k2_version, From 17688476e5cbdba92c682d3a75e3941b647573a7 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 18 Feb 2024 14:56:04 +0800 Subject: [PATCH 2/8] Provider docker images for torch 2.2.0 (#1501) --- .github/workflows/build-docker-image.yml | 2 +- .github/workflows/run-docker-image.yml | 9 ++- docker/torch1.12.1-cuda11.3.dockerfile | 4 +- docker/torch1.13.0-cuda11.6.dockerfile | 4 +- docker/torch1.9.0-cuda10.2.dockerfile | 4 +- docker/torch2.0.0-cuda11.7.dockerfile | 4 +- docker/torch2.1.0-cuda11.8.dockerfile | 4 +- docker/torch2.1.0-cuda12.1.dockerfile | 4 +- docker/torch2.2.0-cuda11.8.dockerfile | 70 ++++++++++++++++++++++++ docker/torch2.2.0-cuda12.1.dockerfile | 70 ++++++++++++++++++++++++ docs/source/docker/intro.rst | 2 + 11 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 docker/torch2.2.0-cuda11.8.dockerfile create mode 100644 docker/torch2.2.0-cuda12.1.dockerfile diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index e5d96dcdf..d5081f7d8 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - image: ["torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] + image: ["torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] steps: # refer to https://github.com/actions/checkout diff --git a/.github/workflows/run-docker-image.yml b/.github/workflows/run-docker-image.yml index d048923b6..65ba2cd64 100644 --- a/.github/workflows/run-docker-image.yml +++ b/.github/workflows/run-docker-image.yml @@ -14,13 +14,20 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - image: ["torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] + image: ["torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] steps: # refer to https://github.com/actions/checkout - uses: actions/checkout@v2 with: fetch-depth: 0 + - name: Free space + shell: bash + run: | + df -h + rm -rf /opt/hostedtoolcache + df -h + - name: Run the build process with Docker uses: addnab/docker-run-action@v3 with: diff --git a/docker/torch1.12.1-cuda11.3.dockerfile b/docker/torch1.12.1-cuda11.3.dockerfile index deb5715cc..cb885e59e 100644 --- a/docker/torch1.12.1-cuda11.3.dockerfile +++ b/docker/torch1.12.1-cuda11.3.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.7 -ARG K2_VERSION="1.24.4.dev20230725+cuda11.3.torch1.12.1" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.3.torch1.12.1" +ARG K2_VERSION="1.24.4.dev20240211+cuda11.3.torch1.12.1" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.3.torch1.12.1" ARG TORCHAUDIO_VERSION="0.12.1+cu113" LABEL authors="Fangjun Kuang " diff --git a/docker/torch1.13.0-cuda11.6.dockerfile b/docker/torch1.13.0-cuda11.6.dockerfile index afc6c1b84..e238d87aa 100644 --- a/docker/torch1.13.0-cuda11.6.dockerfile +++ b/docker/torch1.13.0-cuda11.6.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.9 -ARG K2_VERSION="1.24.4.dev20231021+cuda11.6.torch1.13.0" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.6.torch1.13.0" +ARG K2_VERSION="1.24.4.dev20240211+cuda11.6.torch1.13.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.6.torch1.13.0" ARG TORCHAUDIO_VERSION="0.13.0+cu116" LABEL authors="Fangjun Kuang " diff --git a/docker/torch1.9.0-cuda10.2.dockerfile b/docker/torch1.9.0-cuda10.2.dockerfile index 9ff225b54..26d45cafc 100644 --- a/docker/torch1.9.0-cuda10.2.dockerfile +++ b/docker/torch1.9.0-cuda10.2.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.7 -ARG K2_VERSION="1.24.3.dev20230726+cuda10.2.torch1.9.0" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda10.2.torch1.9.0" +ARG K2_VERSION="1.24.4.dev20240211+cuda10.2.torch1.9.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda10.2.torch1.9.0" ARG TORCHAUDIO_VERSION="0.9.0" LABEL authors="Fangjun Kuang " diff --git a/docker/torch2.0.0-cuda11.7.dockerfile b/docker/torch2.0.0-cuda11.7.dockerfile index db8076560..02906e53b 100644 --- a/docker/torch2.0.0-cuda11.7.dockerfile +++ b/docker/torch2.0.0-cuda11.7.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.10 -ARG K2_VERSION="1.24.4.dev20231021+cuda11.7.torch2.0.0" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.7.torch2.0.0" +ARG K2_VERSION="1.24.4.dev20240211+cuda11.7.torch2.0.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.7.torch2.0.0" ARG TORCHAUDIO_VERSION="2.0.0+cu117" LABEL authors="Fangjun Kuang " diff --git a/docker/torch2.1.0-cuda11.8.dockerfile b/docker/torch2.1.0-cuda11.8.dockerfile index b006b0d96..c87305922 100644 --- a/docker/torch2.1.0-cuda11.8.dockerfile +++ b/docker/torch2.1.0-cuda11.8.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.10 -ARG K2_VERSION="1.24.4.dev20231021+cuda11.8.torch2.1.0" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.8.torch2.1.0" +ARG K2_VERSION="1.24.4.dev20240211+cuda11.8.torch2.1.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.8.torch2.1.0" ARG TORCHAUDIO_VERSION="2.1.0+cu118" LABEL authors="Fangjun Kuang " diff --git a/docker/torch2.1.0-cuda12.1.dockerfile b/docker/torch2.1.0-cuda12.1.dockerfile index 1b078dc22..f4c297678 100644 --- a/docker/torch2.1.0-cuda12.1.dockerfile +++ b/docker/torch2.1.0-cuda12.1.dockerfile @@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # python 3.10 -ARG K2_VERSION="1.24.4.dev20231021+cuda12.1.torch2.1.0" -ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda12.1.torch2.1.0" +ARG K2_VERSION="1.24.4.dev20240211+cuda12.1.torch2.1.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda12.1.torch2.1.0" ARG TORCHAUDIO_VERSION="2.1.0+cu121" LABEL authors="Fangjun Kuang " diff --git a/docker/torch2.2.0-cuda11.8.dockerfile b/docker/torch2.2.0-cuda11.8.dockerfile new file mode 100644 index 000000000..c59661c27 --- /dev/null +++ b/docker/torch2.2.0-cuda11.8.dockerfile @@ -0,0 +1,70 @@ +FROM pytorch/pytorch:2.2.0-cuda11.8-cudnn8-devel + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +# python 3.10 +ARG K2_VERSION="1.24.4.dev20240211+cuda11.8.torch2.2.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.8.torch2.2.0" +ARG TORCHAUDIO_VERSION="2.2.0+cu118" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docker/torch2.2.0-cuda12.1.dockerfile b/docker/torch2.2.0-cuda12.1.dockerfile new file mode 100644 index 000000000..2c484efd5 --- /dev/null +++ b/docker/torch2.2.0-cuda12.1.dockerfile @@ -0,0 +1,70 @@ +FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-devel + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +# python 3.10 +ARG K2_VERSION="1.24.4.dev20240211+cuda12.1.torch2.2.0" +ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda12.1.torch2.2.0" +ARG TORCHAUDIO_VERSION="2.2.0+cu121" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docs/source/docker/intro.rst b/docs/source/docker/intro.rst index cbd300d9b..149970eff 100644 --- a/docs/source/docker/intro.rst +++ b/docs/source/docker/intro.rst @@ -34,6 +34,8 @@ which will give you something like below: .. code-block:: bash + "torch2.2.0-cuda12.1" + "torch2.2.0-cuda11.8" "torch2.1.0-cuda12.1" "torch2.1.0-cuda11.8" "torch2.0.0-cuda11.7" From 7eb360d0d5f3eb03292d3ff4596a8d50c9765888 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 18 Feb 2024 20:32:40 +0800 Subject: [PATCH 3/8] Fix cpu docker images for torch 2.2.0 (#1502) --- .github/scripts/docker/generate_build_matrix.py | 4 ++-- .github/workflows/yesno.yml | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index f0690f8bf..425afac2b 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -43,8 +43,8 @@ def get_torchaudio_version(torch_version): def get_matrix(): - k2_version = "1.24.4.dev20240211" - kaldifeat_version = "1.25.4.dev20240210" + k2_version = "1.24.4.dev20240218" + kaldifeat_version = "1.25.4.dev20240218" version = "1.3" python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] diff --git a/.github/workflows/yesno.yml b/.github/workflows/yesno.yml index 182300dfa..de822b33f 100644 --- a/.github/workflows/yesno.yml +++ b/.github/workflows/yesno.yml @@ -59,4 +59,7 @@ jobs: cd /icefall git config --global --add safe.directory /icefall + python3 -m torch.utils.collect_env + python3 -m k2.version + .github/scripts/yesno/ASR/run.sh From db4d66c0e39a06464f5c316c727ed76babeb10eb Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 19 Feb 2024 16:13:09 +0800 Subject: [PATCH 4/8] Fixed softlink for `ljspeech` recipe (#1503) --- egs/ljspeech/TTS/shared | 1 + egs/ljspeech/TTS/shared/parse_options.sh | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 120000 egs/ljspeech/TTS/shared delete mode 120000 egs/ljspeech/TTS/shared/parse_options.sh diff --git a/egs/ljspeech/TTS/shared b/egs/ljspeech/TTS/shared new file mode 120000 index 000000000..4c5e91438 --- /dev/null +++ b/egs/ljspeech/TTS/shared @@ -0,0 +1 @@ +../../../icefall/shared/ \ No newline at end of file diff --git a/egs/ljspeech/TTS/shared/parse_options.sh b/egs/ljspeech/TTS/shared/parse_options.sh deleted file mode 120000 index e4665e7de..000000000 --- a/egs/ljspeech/TTS/shared/parse_options.sh +++ /dev/null @@ -1 +0,0 @@ -../../../librispeech/ASR/shared/parse_options.sh \ No newline at end of file From b3e2044068001a24bc9293f5b7063377173631d3 Mon Sep 17 00:00:00 2001 From: Zengwei Yao Date: Mon, 19 Feb 2024 19:33:32 +0800 Subject: [PATCH 5/8] minor fix of vits/tokenizer.py (#1504) * minor fix of vits/tokenizer.py --- egs/ljspeech/TTS/vits/tokenizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/egs/ljspeech/TTS/vits/tokenizer.py b/egs/ljspeech/TTS/vits/tokenizer.py index 70f1240b4..b0afc6a04 100644 --- a/egs/ljspeech/TTS/vits/tokenizer.py +++ b/egs/ljspeech/TTS/vits/tokenizer.py @@ -74,7 +74,7 @@ class Tokenizer(object): if intersperse_blank: token_ids = intersperse(token_ids, self.blank_id) - token_ids_list.append(token_ids) + token_ids_list.append(token_ids) return token_ids_list @@ -103,6 +103,7 @@ class Tokenizer(object): if intersperse_blank: token_ids = intersperse(token_ids, self.blank_id) - token_ids_list.append(token_ids) + + token_ids_list.append(token_ids) return token_ids_list From e59fa38e86bd05241daa4217d66eaa0e36825547 Mon Sep 17 00:00:00 2001 From: Karel Vesely Date: Tue, 20 Feb 2024 03:40:15 +0100 Subject: [PATCH 6/8] docs: minor fixes of LM rescoring texts (#1498) --- .../decoding-with-langugage-models/LODR.rst | 6 ++--- .../shallow-fusion.rst | 24 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/source/decoding-with-langugage-models/LODR.rst b/docs/source/decoding-with-langugage-models/LODR.rst index b6b6e8cbb..d4b6f7065 100644 --- a/docs/source/decoding-with-langugage-models/LODR.rst +++ b/docs/source/decoding-with-langugage-models/LODR.rst @@ -30,7 +30,7 @@ of langugae model integration. First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here `_ to address the language information mismatch between the training corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain -are acoustically similar, DR derives the following formular for decoding with Bayes' theorem: +are acoustically similar, DR derives the following formula for decoding with Bayes' theorem: .. math:: @@ -41,7 +41,7 @@ are acoustically similar, DR derives the following formular for decoding with Ba where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively. -Here, the source domain LM is trained on the training corpus. The only difference in the above formular compared to +Here, the source domain LM is trained on the training corpus. The only difference in the above formula compared to shallow fusion is the subtraction of the source domain LM. Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is @@ -58,7 +58,7 @@ during decoding for transducer model: In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Compared to DR, the only difference lies in the choice of source domain LM. According to the original `paper `_, -LODR achieves similar performance compared DR in both intra-domain and cross-domain settings. +LODR achieves similar performance compared to DR in both intra-domain and cross-domain settings. As a bi-gram is much faster to evaluate, LODR is usually much faster. Now, we will show you how to use LODR in ``icefall``. diff --git a/docs/source/decoding-with-langugage-models/shallow-fusion.rst b/docs/source/decoding-with-langugage-models/shallow-fusion.rst index 684fefeb4..8b2586730 100644 --- a/docs/source/decoding-with-langugage-models/shallow-fusion.rst +++ b/docs/source/decoding-with-langugage-models/shallow-fusion.rst @@ -9,9 +9,9 @@ to improve the word-error-rate of a transducer model. .. note:: - This tutorial is based on the recipe + This tutorial is based on the recipe `pruned_transducer_stateless7_streaming `_, - which is a streaming transducer model trained on `LibriSpeech`_. + which is a streaming transducer model trained on `LibriSpeech`_. However, you can easily apply shallow fusion to other recipes. If you encounter any problems, please open an issue here `icefall `_. @@ -69,11 +69,11 @@ Training a language model usually takes a long time, we can download a pre-train .. code-block:: bash $ # download the external LM - $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm + $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm $ # create a symbolic link so that the checkpoint can be loaded $ pushd icefall-librispeech-rnn-lm/exp $ git lfs pull --include "pretrained.pt" - $ ln -s pretrained.pt epoch-99.pt + $ ln -s pretrained.pt epoch-99.pt $ popd .. note:: @@ -85,7 +85,7 @@ Training a language model usually takes a long time, we can download a pre-train To use shallow fusion for decoding, we can execute the following command: .. code-block:: bash - + $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp $ lm_dir=./icefall-librispeech-rnn-lm/exp $ lm_scale=0.29 @@ -133,16 +133,16 @@ The decoding result obtained with the above command are shown below. $ For test-other, WER of different settings are: $ beam_size_4 7.08 best for test-other -The improvement of shallow fusion is very obvious! The relative WER reduction on test-other is around 10.5%. +The improvement of shallow fusion is very obvious! The relative WER reduction on test-other is around 10.5%. A few parameters can be tuned to further boost the performance of shallow fusion: -- ``--lm-scale`` +- ``--lm-scale`` - Controls the scale of the LM. If too small, the external language model may not be fully utilized; if too large, - the LM score may dominant during decoding, leading to bad WER. A typical value of this is around 0.3. + Controls the scale of the LM. If too small, the external language model may not be fully utilized; if too large, + the LM score might be dominant during decoding, leading to bad WER. A typical value of this is around 0.3. + +- ``--beam-size`` -- ``--beam-size`` - The number of active paths in the search beam. It controls the trade-off between decoding efficiency and accuracy. Here, we also show how `--beam-size` effect the WER and decoding time: @@ -176,4 +176,4 @@ As we see, a larger beam size during shallow fusion improves the WER, but is als - + From 027302c902ce9ab44754d42a56cf1eba9a075be9 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Tue, 20 Feb 2024 14:38:51 +0800 Subject: [PATCH 7/8] minor fix for param. names (#1495) --- icefall/lm_wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/icefall/lm_wrapper.py b/icefall/lm_wrapper.py index 5e2783a47..26839c61c 100644 --- a/icefall/lm_wrapper.py +++ b/icefall/lm_wrapper.py @@ -159,7 +159,7 @@ class LmScorer(torch.nn.Module): """ if lm_type == "rnn": model = RnnLmModel( - vocab_size=params.vocab_size, + vocab_size=params.lm_vocab_size, embedding_dim=params.rnn_lm_embedding_dim, hidden_dim=params.rnn_lm_hidden_dim, num_layers=params.rnn_lm_num_layers, @@ -183,7 +183,7 @@ class LmScorer(torch.nn.Module): elif lm_type == "transformer": model = TransformerLM( - vocab_size=params.vocab_size, + vocab_size=params.lm_vocab_size, d_model=params.transformer_lm_encoder_dim, embedding_dim=params.transformer_lm_embedding_dim, dim_feedforward=params.transformer_lm_dim_feedforward, From c19b4147789f306efed754dd0ed8f651017a7484 Mon Sep 17 00:00:00 2001 From: Wei Kang Date: Wed, 21 Feb 2024 08:04:16 +0800 Subject: [PATCH 8/8] Update docker (adding pypinyin (#1513) Update docker (adding pypinyin) --- .github/scripts/docker/Dockerfile | 1 + .github/scripts/docker/generate_build_matrix.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/scripts/docker/Dockerfile b/.github/scripts/docker/Dockerfile index ee0099911..4adb7ab5c 100644 --- a/.github/scripts/docker/Dockerfile +++ b/.github/scripts/docker/Dockerfile @@ -51,6 +51,7 @@ RUN pip install --no-cache-dir \ onnxruntime \ pytest \ sentencepiece>=0.1.96 \ + pypinyin==0.50.0 \ six \ tensorboard \ typeguard diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 425afac2b..ed01bd740 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -45,7 +45,7 @@ def get_torchaudio_version(torch_version): def get_matrix(): k2_version = "1.24.4.dev20240218" kaldifeat_version = "1.25.4.dev20240218" - version = "1.3" + version = "1.4" python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"] torch_version += ["2.2.0"]