mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-09-03 06:04:18 +00:00
Merge with master
This commit is contained in:
commit
5cc68f614b
1
.github/scripts/docker/Dockerfile
vendored
1
.github/scripts/docker/Dockerfile
vendored
@ -11,6 +11,7 @@ ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
|
||||
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -qq -y \
|
||||
cmake \
|
||||
ffmpeg \
|
||||
git \
|
||||
git-lfs \
|
||||
|
21
.github/scripts/docker/generate_build_matrix.py
vendored
21
.github/scripts/docker/generate_build_matrix.py
vendored
@ -6,8 +6,8 @@ import json
|
||||
|
||||
|
||||
def version_gt(a, b):
|
||||
a_major, a_minor = a.split(".")[:2]
|
||||
b_major, b_minor = b.split(".")[:2]
|
||||
a_major, a_minor = list(map(int, a.split(".")))[:2]
|
||||
b_major, b_minor = list(map(int, b.split(".")))[:2]
|
||||
if a_major > b_major:
|
||||
return True
|
||||
|
||||
@ -18,8 +18,8 @@ def version_gt(a, b):
|
||||
|
||||
|
||||
def version_ge(a, b):
|
||||
a_major, a_minor = a.split(".")[:2]
|
||||
b_major, b_minor = b.split(".")[:2]
|
||||
a_major, a_minor = list(map(int, a.split(".")))[:2]
|
||||
b_major, b_minor = list(map(int, b.split(".")))[:2]
|
||||
if a_major > b_major:
|
||||
return True
|
||||
|
||||
@ -43,11 +43,12 @@ def get_torchaudio_version(torch_version):
|
||||
|
||||
|
||||
def get_matrix():
|
||||
k2_version = "1.24.4.dev20231220"
|
||||
kaldifeat_version = "1.25.3.dev20231221"
|
||||
version = "1.3"
|
||||
python_version = ["3.8", "3.9", "3.10", "3.11"]
|
||||
k2_version = "1.24.4.dev20240218"
|
||||
kaldifeat_version = "1.25.4.dev20240218"
|
||||
version = "1.4"
|
||||
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
torch_version = ["1.13.0", "1.13.1", "2.0.0", "2.0.1", "2.1.0", "2.1.1", "2.1.2"]
|
||||
torch_version += ["2.2.0"]
|
||||
|
||||
matrix = []
|
||||
for p in python_version:
|
||||
@ -57,6 +58,10 @@ def get_matrix():
|
||||
if version_gt(p, "3.10") and not version_gt(t, "2.0"):
|
||||
continue
|
||||
|
||||
# only torch>=2.2.0 supports python 3.12
|
||||
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
|
||||
continue
|
||||
|
||||
matrix.append(
|
||||
{
|
||||
"k2-version": k2_version,
|
||||
|
2
.github/workflows/build-docker-image.yml
vendored
2
.github/workflows/build-docker-image.yml
vendored
@ -16,7 +16,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
image: ["torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
image: ["torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
|
9
.github/workflows/run-docker-image.yml
vendored
9
.github/workflows/run-docker-image.yml
vendored
@ -14,13 +14,20 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
image: ["torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
image: ["torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
||||
steps:
|
||||
# refer to https://github.com/actions/checkout
|
||||
- uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Free space
|
||||
shell: bash
|
||||
run: |
|
||||
df -h
|
||||
rm -rf /opt/hostedtoolcache
|
||||
df -h
|
||||
|
||||
- name: Run the build process with Docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
|
3
.github/workflows/yesno.yml
vendored
3
.github/workflows/yesno.yml
vendored
@ -59,4 +59,7 @@ jobs:
|
||||
cd /icefall
|
||||
git config --global --add safe.directory /icefall
|
||||
|
||||
python3 -m torch.utils.collect_env
|
||||
python3 -m k2.version
|
||||
|
||||
.github/scripts/yesno/ASR/run.sh
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.7
|
||||
ARG K2_VERSION="1.24.4.dev20230725+cuda11.3.torch1.12.1"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.3.torch1.12.1"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda11.3.torch1.12.1"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.3.torch1.12.1"
|
||||
ARG TORCHAUDIO_VERSION="0.12.1+cu113"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.9
|
||||
ARG K2_VERSION="1.24.4.dev20231021+cuda11.6.torch1.13.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.6.torch1.13.0"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda11.6.torch1.13.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.6.torch1.13.0"
|
||||
ARG TORCHAUDIO_VERSION="0.13.0+cu116"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.7
|
||||
ARG K2_VERSION="1.24.3.dev20230726+cuda10.2.torch1.9.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda10.2.torch1.9.0"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda10.2.torch1.9.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda10.2.torch1.9.0"
|
||||
ARG TORCHAUDIO_VERSION="0.9.0"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20231021+cuda11.7.torch2.0.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.7.torch2.0.0"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda11.7.torch2.0.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.7.torch2.0.0"
|
||||
ARG TORCHAUDIO_VERSION="2.0.0+cu117"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20231021+cuda11.8.torch2.1.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda11.8.torch2.1.0"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda11.8.torch2.1.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.8.torch2.1.0"
|
||||
ARG TORCHAUDIO_VERSION="2.1.0+cu118"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
@ -5,8 +5,8 @@ ENV LC_ALL C.UTF-8
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20231021+cuda12.1.torch2.1.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.1.dev20231022+cuda12.1.torch2.1.0"
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda12.1.torch2.1.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda12.1.torch2.1.0"
|
||||
ARG TORCHAUDIO_VERSION="2.1.0+cu121"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
|
70
docker/torch2.2.0-cuda11.8.dockerfile
Normal file
70
docker/torch2.2.0-cuda11.8.dockerfile
Normal file
@ -0,0 +1,70 @@
|
||||
FROM pytorch/pytorch:2.2.0-cuda11.8-cudnn8-devel
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda11.8.torch2.2.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda11.8.torch2.2.0"
|
||||
ARG TORCHAUDIO_VERSION="2.2.0+cu118"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
vim \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
ffmpeg \
|
||||
g++ \
|
||||
gfortran \
|
||||
git \
|
||||
libtool \
|
||||
make \
|
||||
patch \
|
||||
sox \
|
||||
subversion \
|
||||
unzip \
|
||||
valgrind \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
cd /workspace/icefall && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
|
||||
WORKDIR /workspace/icefall
|
70
docker/torch2.2.0-cuda12.1.dockerfile
Normal file
70
docker/torch2.2.0-cuda12.1.dockerfile
Normal file
@ -0,0 +1,70 @@
|
||||
FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-devel
|
||||
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# python 3.10
|
||||
ARG K2_VERSION="1.24.4.dev20240211+cuda12.1.torch2.2.0"
|
||||
ARG KALDIFEAT_VERSION="1.25.4.dev20240210+cuda12.1.torch2.2.0"
|
||||
ARG TORCHAUDIO_VERSION="2.2.0+cu121"
|
||||
|
||||
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
||||
LABEL k2_version=${K2_VERSION}
|
||||
LABEL kaldifeat_version=${KALDIFEAT_VERSION}
|
||||
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
vim \
|
||||
libssl-dev \
|
||||
autoconf \
|
||||
automake \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
ffmpeg \
|
||||
g++ \
|
||||
gfortran \
|
||||
git \
|
||||
libtool \
|
||||
make \
|
||||
patch \
|
||||
sox \
|
||||
subversion \
|
||||
unzip \
|
||||
valgrind \
|
||||
wget \
|
||||
zlib1g-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install dependencies
|
||||
RUN pip install --no-cache-dir \
|
||||
torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
|
||||
k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
|
||||
git+https://github.com/lhotse-speech/lhotse \
|
||||
kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
|
||||
kaldi_native_io \
|
||||
kaldialign \
|
||||
kaldifst \
|
||||
kaldilm \
|
||||
sentencepiece>=0.1.96 \
|
||||
tensorboard \
|
||||
typeguard \
|
||||
dill \
|
||||
onnx \
|
||||
onnxruntime \
|
||||
onnxmltools \
|
||||
multi_quantization \
|
||||
typeguard \
|
||||
numpy \
|
||||
pytest \
|
||||
graphviz
|
||||
|
||||
RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
||||
cd /workspace/icefall && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
||||
|
||||
WORKDIR /workspace/icefall
|
@ -30,7 +30,7 @@ of langugae model integration.
|
||||
First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here <https://arxiv.org/abs/2002.11268>`_
|
||||
to address the language information mismatch between the training
|
||||
corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain
|
||||
are acoustically similar, DR derives the following formular for decoding with Bayes' theorem:
|
||||
are acoustically similar, DR derives the following formula for decoding with Bayes' theorem:
|
||||
|
||||
.. math::
|
||||
|
||||
@ -41,7 +41,7 @@ are acoustically similar, DR derives the following formular for decoding with Ba
|
||||
|
||||
|
||||
where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively.
|
||||
Here, the source domain LM is trained on the training corpus. The only difference in the above formular compared to
|
||||
Here, the source domain LM is trained on the training corpus. The only difference in the above formula compared to
|
||||
shallow fusion is the subtraction of the source domain LM.
|
||||
|
||||
Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is
|
||||
@ -58,7 +58,7 @@ during decoding for transducer model:
|
||||
|
||||
In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Compared to DR,
|
||||
the only difference lies in the choice of source domain LM. According to the original `paper <https://arxiv.org/abs/2203.16776>`_,
|
||||
LODR achieves similar performance compared DR in both intra-domain and cross-domain settings.
|
||||
LODR achieves similar performance compared to DR in both intra-domain and cross-domain settings.
|
||||
As a bi-gram is much faster to evaluate, LODR is usually much faster.
|
||||
|
||||
Now, we will show you how to use LODR in ``icefall``.
|
||||
|
@ -9,9 +9,9 @@ to improve the word-error-rate of a transducer model.
|
||||
|
||||
.. note::
|
||||
|
||||
This tutorial is based on the recipe
|
||||
This tutorial is based on the recipe
|
||||
`pruned_transducer_stateless7_streaming <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_,
|
||||
which is a streaming transducer model trained on `LibriSpeech`_.
|
||||
which is a streaming transducer model trained on `LibriSpeech`_.
|
||||
However, you can easily apply shallow fusion to other recipes.
|
||||
If you encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`_.
|
||||
|
||||
@ -69,11 +69,11 @@ Training a language model usually takes a long time, we can download a pre-train
|
||||
.. code-block:: bash
|
||||
|
||||
$ # download the external LM
|
||||
$ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||
$ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
||||
$ # create a symbolic link so that the checkpoint can be loaded
|
||||
$ pushd icefall-librispeech-rnn-lm/exp
|
||||
$ git lfs pull --include "pretrained.pt"
|
||||
$ ln -s pretrained.pt epoch-99.pt
|
||||
$ ln -s pretrained.pt epoch-99.pt
|
||||
$ popd
|
||||
|
||||
.. note::
|
||||
@ -85,7 +85,7 @@ Training a language model usually takes a long time, we can download a pre-train
|
||||
To use shallow fusion for decoding, we can execute the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
||||
$ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
|
||||
$ lm_dir=./icefall-librispeech-rnn-lm/exp
|
||||
$ lm_scale=0.29
|
||||
@ -133,16 +133,16 @@ The decoding result obtained with the above command are shown below.
|
||||
$ For test-other, WER of different settings are:
|
||||
$ beam_size_4 7.08 best for test-other
|
||||
|
||||
The improvement of shallow fusion is very obvious! The relative WER reduction on test-other is around 10.5%.
|
||||
The improvement of shallow fusion is very obvious! The relative WER reduction on test-other is around 10.5%.
|
||||
A few parameters can be tuned to further boost the performance of shallow fusion:
|
||||
|
||||
- ``--lm-scale``
|
||||
- ``--lm-scale``
|
||||
|
||||
Controls the scale of the LM. If too small, the external language model may not be fully utilized; if too large,
|
||||
the LM score may dominant during decoding, leading to bad WER. A typical value of this is around 0.3.
|
||||
Controls the scale of the LM. If too small, the external language model may not be fully utilized; if too large,
|
||||
the LM score might be dominant during decoding, leading to bad WER. A typical value of this is around 0.3.
|
||||
|
||||
- ``--beam-size``
|
||||
|
||||
- ``--beam-size``
|
||||
|
||||
The number of active paths in the search beam. It controls the trade-off between decoding efficiency and accuracy.
|
||||
|
||||
Here, we also show how `--beam-size` effect the WER and decoding time:
|
||||
@ -176,4 +176,4 @@ As we see, a larger beam size during shallow fusion improves the WER, but is als
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -34,6 +34,8 @@ which will give you something like below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
"torch2.2.0-cuda12.1"
|
||||
"torch2.2.0-cuda11.8"
|
||||
"torch2.1.0-cuda12.1"
|
||||
"torch2.1.0-cuda11.8"
|
||||
"torch2.0.0-cuda11.7"
|
||||
|
1
egs/ljspeech/TTS/shared
Symbolic link
1
egs/ljspeech/TTS/shared
Symbolic link
@ -0,0 +1 @@
|
||||
../../../icefall/shared/
|
@ -1 +0,0 @@
|
||||
../../../librispeech/ASR/shared/parse_options.sh
|
@ -74,7 +74,7 @@ class Tokenizer(object):
|
||||
if intersperse_blank:
|
||||
token_ids = intersperse(token_ids, self.blank_id)
|
||||
|
||||
token_ids_list.append(token_ids)
|
||||
token_ids_list.append(token_ids)
|
||||
|
||||
return token_ids_list
|
||||
|
||||
@ -103,6 +103,7 @@ class Tokenizer(object):
|
||||
|
||||
if intersperse_blank:
|
||||
token_ids = intersperse(token_ids, self.blank_id)
|
||||
token_ids_list.append(token_ids)
|
||||
|
||||
token_ids_list.append(token_ids)
|
||||
|
||||
return token_ids_list
|
||||
|
@ -159,7 +159,7 @@ class LmScorer(torch.nn.Module):
|
||||
"""
|
||||
if lm_type == "rnn":
|
||||
model = RnnLmModel(
|
||||
vocab_size=params.vocab_size,
|
||||
vocab_size=params.lm_vocab_size,
|
||||
embedding_dim=params.rnn_lm_embedding_dim,
|
||||
hidden_dim=params.rnn_lm_hidden_dim,
|
||||
num_layers=params.rnn_lm_num_layers,
|
||||
@ -183,7 +183,7 @@ class LmScorer(torch.nn.Module):
|
||||
|
||||
elif lm_type == "transformer":
|
||||
model = TransformerLM(
|
||||
vocab_size=params.vocab_size,
|
||||
vocab_size=params.lm_vocab_size,
|
||||
d_model=params.transformer_lm_encoder_dim,
|
||||
embedding_dim=params.transformer_lm_embedding_dim,
|
||||
dim_feedforward=params.transformer_lm_dim_feedforward,
|
||||
|
Loading…
x
Reference in New Issue
Block a user