From 717f98cc83bfa544fc29802d567b630255519496 Mon Sep 17 00:00:00 2001
From: csukuangfj <csukuangfj@users.noreply.github.com>
Date: Wed, 16 Aug 2023 15:06:23 +0000
Subject: [PATCH] deploy: fc2df07841b3edbd7bffddfcc2e016515aa75247

---
 _sources/for-dummies/data-preparation.rst.txt | 180 ++++++++
 _sources/for-dummies/decoding.rst.txt         |  39 ++
 .../for-dummies/environment-setup.rst.txt     | 121 ++++++
 _sources/for-dummies/index.rst.txt            |  34 ++
 _sources/for-dummies/model-export.rst.txt     | 310 +++++++++++++
 _sources/for-dummies/training.rst.txt         |  39 ++
 _sources/index.rst.txt                        |   1 +
 contributing/code-style.html                  |   1 +
 contributing/doc.html                         |   1 +
 contributing/how-to-create-a-recipe.html      |   1 +
 contributing/index.html                       |   1 +
 decoding-with-langugage-models/LODR.html      |   1 +
 decoding-with-langugage-models/index.html     |   1 +
 decoding-with-langugage-models/rescoring.html |   1 +
 .../shallow-fusion.html                       |   1 +
 docker/index.html                             |   1 +
 docker/intro.html                             |   1 +
 faqs.html                                     |   1 +
 for-dummies/data-preparation.html             | 299 +++++++++++++
 for-dummies/decoding.html                     | 163 +++++++
 for-dummies/environment-setup.html            | 238 ++++++++++
 for-dummies/index.html                        | 181 ++++++++
 for-dummies/model-export.html                 | 410 ++++++++++++++++++
 for-dummies/training.html                     | 159 +++++++
 genindex.html                                 |   1 +
 huggingface/index.html                        |   1 +
 huggingface/pretrained-models.html            |   1 +
 huggingface/spaces.html                       |   1 +
 index.html                                    |  13 +-
 installation/index.html                       |   5 +-
 model-export/export-model-state-dict.html     |   1 +
 model-export/export-ncnn-conv-emformer.html   |   1 +
 model-export/export-ncnn-lstm.html            |   1 +
 model-export/export-ncnn-zipformer.html       |   1 +
 model-export/export-ncnn.html                 |   1 +
 model-export/export-onnx.html                 |   1 +
 .../export-with-torch-jit-script.html         |   1 +
 model-export/export-with-torch-jit-trace.html |   1 +
 model-export/index.html                       |   1 +
 objects.inv                                   | Bin 1682 -> 1860 bytes
 .../aishell/conformer_ctc.html                |   1 +
 recipes/Non-streaming-ASR/aishell/index.html  |   1 +
 .../aishell/stateless_transducer.html         |   1 +
 .../aishell/tdnn_lstm_ctc.html                |   1 +
 recipes/Non-streaming-ASR/index.html          |   1 +
 .../librispeech/conformer_ctc.html            |   5 +-
 .../librispeech/distillation.html             |   9 +-
 .../Non-streaming-ASR/librispeech/index.html  |   1 +
 .../pruned_transducer_stateless.html          |   5 +-
 .../librispeech/tdnn_lstm_ctc.html            |   1 +
 .../librispeech/zipformer_ctc_blankskip.html  |   1 +
 .../librispeech/zipformer_mmi.html            |   1 +
 recipes/Non-streaming-ASR/timit/index.html    |   1 +
 .../timit/tdnn_ligru_ctc.html                 |   1 +
 .../timit/tdnn_lstm_ctc.html                  |   1 +
 recipes/Non-streaming-ASR/yesno/index.html    |   1 +
 recipes/Non-streaming-ASR/yesno/tdnn.html     |   1 +
 recipes/Streaming-ASR/index.html              |   1 +
 recipes/Streaming-ASR/introduction.html       |   1 +
 recipes/Streaming-ASR/librispeech/index.html  |   1 +
 .../lstm_pruned_stateless_transducer.html     |   5 +-
 .../pruned_transducer_stateless.html          |   5 +-
 .../librispeech/zipformer_transducer.html     |   1 +
 recipes/index.html                            |   1 +
 search.html                                   |   1 +
 searchindex.js                                |   2 +-
 66 files changed, 2250 insertions(+), 17 deletions(-)
 create mode 100644 _sources/for-dummies/data-preparation.rst.txt
 create mode 100644 _sources/for-dummies/decoding.rst.txt
 create mode 100644 _sources/for-dummies/environment-setup.rst.txt
 create mode 100644 _sources/for-dummies/index.rst.txt
 create mode 100644 _sources/for-dummies/model-export.rst.txt
 create mode 100644 _sources/for-dummies/training.rst.txt
 create mode 100644 for-dummies/data-preparation.html
 create mode 100644 for-dummies/decoding.html
 create mode 100644 for-dummies/environment-setup.html
 create mode 100644 for-dummies/index.html
 create mode 100644 for-dummies/model-export.html
 create mode 100644 for-dummies/training.html

diff --git a/_sources/for-dummies/data-preparation.rst.txt b/_sources/for-dummies/data-preparation.rst.txt
new file mode 100644
index 000000000..f03d44e79
--- /dev/null
+++ b/_sources/for-dummies/data-preparation.rst.txt
@@ -0,0 +1,180 @@
+.. _dummies_tutorial_data_preparation:
+
+Data Preparation
+================
+
+After :ref:`dummies_tutorial_environment_setup`, we can start preparing the
+data for training and decoding.
+
+The first step is to prepare the data for training. We have already provided
+`prepare.sh <https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh>`_
+that would prepare everything required for training.
+
+.. code-block::
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   ./prepare.sh
+
+Note that in each recipe from `icefall`_, there exists a file ``prepare.sh``,
+which you should run before you run anything else.
+
+That is all you need for data preparation.
+
+For the more curious
+--------------------
+
+If you are wondering how to prepare your own dataset, please refer to the following
+URLs for more details:
+
+  - `<https://github.com/lhotse-speech/lhotse/tree/master/lhotse/recipes>`_
+
+    It contains recipes for a variety of dataset. If you want to add your own
+    dataset, please read recipes in this folder first.
+
+  - `<https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py>`_
+
+    The `yesno`_ recipe in `lhotse`_.
+
+If you already have a `Kaldi`_ dataset directory, which contains files like
+``wav.scp``, ``feats.scp``, then you can refer to `<https://lhotse.readthedocs.io/en/latest/kaldi.html#example>`_.
+
+A quick look to the generated files
+-----------------------------------
+
+``./prepare.sh`` puts generated files into two directories:
+
+  - ``download``
+  - ``data``
+
+download
+^^^^^^^^
+
+The ``download`` directory contains downloaded dataset files:
+
+.. code-block:: bas
+
+    tree -L 1 ./download/
+
+    ./download/
+    |-- waves_yesno
+    `-- waves_yesno.tar.gz
+
+.. hint::
+
+   Please refer to `<https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py#L41>`_
+   for how the data is downloaded and extracted.
+
+data
+^^^^
+
+.. code-block:: bash
+
+    tree ./data/
+
+    ./data/
+    |-- fbank
+    |   |-- yesno_cuts_test.jsonl.gz
+    |   |-- yesno_cuts_train.jsonl.gz
+    |   |-- yesno_feats_test.lca
+    |   `-- yesno_feats_train.lca
+    |-- lang_phone
+    |   |-- HLG.pt
+    |   |-- L.pt
+    |   |-- L_disambig.pt
+    |   |-- Linv.pt
+    |   |-- lexicon.txt
+    |   |-- lexicon_disambig.txt
+    |   |-- tokens.txt
+    |   `-- words.txt
+    |-- lm
+    |   |-- G.arpa
+    |   `-- G.fst.txt
+    `-- manifests
+        |-- yesno_recordings_test.jsonl.gz
+        |-- yesno_recordings_train.jsonl.gz
+        |-- yesno_supervisions_test.jsonl.gz
+        `-- yesno_supervisions_train.jsonl.gz
+
+    4 directories, 18 files
+
+**data/manifests**:
+
+  This directory contains manifests. They are used to generate files in
+  ``data/fbank``.
+
+  To give you an idea of what it contains, we examine the first few lines of
+  the manifests related to the ``train`` dataset.
+
+  .. code-block:: bash
+
+      cd data/manifests
+      gunzip -c  yesno_recordings_train.jsonl.gz  | head -n 3
+
+  The output is given below:
+
+    .. code-block:: bash
+
+      {"id": "0_0_0_0_1_1_1_1", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav"}], "sampling_rate": 8000, "num_samples": 50800, "duration": 6.35, "channel_ids": [0]}
+      {"id": "0_0_0_1_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48880, "duration": 6.11, "channel_ids": [0]}
+      {"id": "0_0_1_0_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48160, "duration": 6.02, "channel_ids": [0]}
+
+  Please refer to `<https://github.com/lhotse-speech/lhotse/blob/master/lhotse/audio.py#L300>`_
+  for the meaning of each field per line.
+
+  .. code-block:: bash
+
+      gunzip -c  yesno_supervisions_train.jsonl.gz  | head -n 3
+
+  The output is given below:
+
+  .. code-block:: bash
+
+      {"id": "0_0_0_0_1_1_1_1", "recording_id": "0_0_0_0_1_1_1_1", "start": 0.0, "duration": 6.35, "channel": 0, "text": "NO NO NO NO YES YES YES YES", "language": "Hebrew"}
+      {"id": "0_0_0_1_0_1_1_0", "recording_id": "0_0_0_1_0_1_1_0", "start": 0.0, "duration": 6.11, "channel": 0, "text": "NO NO NO YES NO YES YES NO", "language": "Hebrew"}
+      {"id": "0_0_1_0_0_1_1_0", "recording_id": "0_0_1_0_0_1_1_0", "start": 0.0, "duration": 6.02, "channel": 0, "text": "NO NO YES NO NO YES YES NO", "language": "Hebrew"}
+
+  Please refer to `<https://github.com/lhotse-speech/lhotse/blob/master/lhotse/supervision.py#L510>`_
+  for the meaning of each field per line.
+
+**data/fbank**:
+
+  This directory contains everything from ``data/manifests``. Furthermore, it also contains features
+  for training.
+
+  ``data/fbank/yesno_feats_train.lca`` contains the features for the train dataset.
+  Features are compressed using `lilcom`_.
+
+  ``data/fbank/yesno_cuts_train.jsonl.gz`` stores the `CutSet <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/cut/set.py#L72>`_,
+  which stores `RecordingSet <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/audio.py#L928>`_,
+  `SupervisionSet <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/supervision.py#L510>`_,
+  and `FeatureSet <https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/base.py#L593>`_.
+
+  To give you an idea about what it looks like, we can run the following command:
+
+    .. code-block:: bash
+
+        cd data/fbank
+
+        gunzip -c yesno_cuts_train.jsonl.gz | head -n 3
+
+  The output is given below:
+
+    .. code-block:: bash
+
+      {"id": "0_0_0_0_1_1_1_1-0", "start": 0, "duration": 6.35, "channel": 0, "supervisions": [{"id": "0_0_0_0_1_1_1_1", "recording_id": "0_0_0_0_1_1_1_1", "start": 0.0, "duration": 6.35, "channel": 0, "text": "NO NO NO NO YES YES YES YES", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 635, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.35, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "0,13000,3570", "channels": 0}, "recording": {"id": "0_0_0_0_1_1_1_1", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav"}], "sampling_rate": 8000, "num_samples": 50800, "duration": 6.35, "channel_ids": [0]}, "type": "MonoCut"}
+      {"id": "0_0_0_1_0_1_1_0-1", "start": 0, "duration": 6.11, "channel": 0, "supervisions": [{"id": "0_0_0_1_0_1_1_0", "recording_id": "0_0_0_1_0_1_1_0", "start": 0.0, "duration": 6.11, "channel": 0, "text": "NO NO NO YES NO YES YES NO", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 611, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.11, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "16570,12964,2929", "channels": 0}, "recording": {"id": "0_0_0_1_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48880, "duration": 6.11, "channel_ids": [0]}, "type": "MonoCut"}
+      {"id": "0_0_1_0_0_1_1_0-2", "start": 0, "duration": 6.02, "channel": 0, "supervisions": [{"id": "0_0_1_0_0_1_1_0", "recording_id": "0_0_1_0_0_1_1_0", "start": 0.0, "duration": 6.02, "channel": 0, "text": "NO NO YES NO NO YES YES NO", "language": "Hebrew"}], "features": {"type": "kaldi-fbank", "num_frames": 602, "num_features": 23, "frame_shift": 0.01, "sampling_rate": 8000, "start": 0, "duration": 6.02, "storage_type": "lilcom_chunky", "storage_path": "data/fbank/yesno_feats_train.lca", "storage_key": "32463,12936,2696", "channels": 0}, "recording": {"id": "0_0_1_0_0_1_1_0", "sources": [{"type": "file", "channels": [0], "source": "/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav"}], "sampling_rate": 8000, "num_samples": 48160, "duration": 6.02, "channel_ids": [0]}, "type": "MonoCut"}
+
+  Note that ``yesno_cuts_train.jsonl.gz`` only stores the information about how to read the features.
+  The actual features are stored separately in ``data/fbank/yesno_feats_train.lca``.
+
+**data/lang**:
+
+  This directory contains the lexicon.
+
+**data/lm**:
+
+  This directory contains language models.
diff --git a/_sources/for-dummies/decoding.rst.txt b/_sources/for-dummies/decoding.rst.txt
new file mode 100644
index 000000000..3e48e8bfd
--- /dev/null
+++ b/_sources/for-dummies/decoding.rst.txt
@@ -0,0 +1,39 @@
+.. _dummies_tutorial_decoding:
+
+Decoding
+========
+
+After :ref:`dummies_tutorial_training`, we can start decoding.
+
+The command to start the decoding is quite simple:
+
+.. code-block:: bash
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   # We use CPU for decoding by setting the following environment variable
+   export CUDA_VISIBLE_DEVICES=""
+
+   ./tdnn/decode.py
+
+The output logs are given below:
+
+.. literalinclude:: ./code/decoding-yesno.txt
+
+For the more curious
+--------------------
+
+.. code-block:: bash
+
+   ./tdnn/decode.py --help
+
+will print the usage information about ``./tdnn/decode.py``. For instance, you
+can specify:
+
+  - ``--epoch`` to use which checkpoint for decoding
+  - ``--avg`` to select how many checkpoints to use for model averaging
+
+You usually try different combinations of ``--epoch`` and ``--avg`` and select
+one that leads to the lowest WER (`Word Error Rate <https://en.wikipedia.org/wiki/Word_error_rate>`_).
diff --git a/_sources/for-dummies/environment-setup.rst.txt b/_sources/for-dummies/environment-setup.rst.txt
new file mode 100644
index 000000000..0cb8ecc1d
--- /dev/null
+++ b/_sources/for-dummies/environment-setup.rst.txt
@@ -0,0 +1,121 @@
+.. _dummies_tutorial_environment_setup:
+
+Environment setup
+=================
+
+We will create an environment for `Next-gen Kaldi`_ that runs on ``CPU``
+in this tutorial.
+
+.. note::
+
+   Since the `yesno`_ dataset used in this tutorial is very tiny, training on
+   ``CPU`` works very well for it.
+
+   If your dataset is very large, e.g., hundreds or thousands of hours of
+   training data, please follow :ref:`install icefall` to install `icefall`_
+   that works with ``GPU``.
+
+
+Create a virtual environment
+----------------------------
+
+.. code-block:: bash
+
+  virtualenv -p python3 /tmp/icefall_env
+
+The above command creates a virtual environment in the directory ``/tmp/icefall_env``.
+You can select any directory you want.
+
+The output of the above command is given below:
+
+.. code-block:: bash
+
+  Already using interpreter /usr/bin/python3
+  Using base prefix '/usr'
+  New python executable in /tmp/icefall_env/bin/python3
+  Also creating executable in /tmp/icefall_env/bin/python
+  Installing setuptools, pkg_resources, pip, wheel...done.
+
+Now we can activate the environment using:
+
+.. code-block:: bash
+
+  source /tmp/icefall_env/bin/activate
+
+Install dependencies
+--------------------
+
+.. warning::
+
+   Remeber to activate your virtual environment before you continue!
+
+After activating the virtual environment, we can use the following command
+to install dependencies of `icefall`_:
+
+.. hint::
+
+   Remeber that we will run this tutorial on ``CPU``, so we install
+   dependencies required only by running on ``CPU``.
+
+.. code-block:: bash
+
+   # Caution: Installation order matters!
+
+   # We use torch 2.0.0 and torchaduio 2.0.0 in this tutorial.
+   # Other versions should also work.
+
+   pip install torch==2.0.0+cpu torchaudio==2.0.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+
+   # If you are using macOS or Windows, please use the following command to install torch and torchaudio
+   # pip install torch==2.0.0 torchaudio==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html
+
+   # Now install k2
+   # Please refer to https://k2-fsa.github.io/k2/installation/from_wheels.html#linux-cpu-example
+
+   pip install k2==1.24.3.dev20230726+cpu.torch2.0.0 -f https://k2-fsa.github.io/k2/cpu.html
+
+   # Install the latest version of lhotse
+
+   pip install git+https://github.com/lhotse-speech/lhotse
+
+
+Install icefall
+---------------
+
+We will put the source code of `icefall`_ into the directory ``/tmp``
+You can select any directory you want.
+
+.. code-block:: bash
+
+   cd /tmp
+   git clone https://github.com/k2-fsa/icefall
+   cd icefall
+   pip install -r ./requirements.txt
+
+.. code-block:: bash
+
+   # Anytime we want to use icefall, we have to set the following
+   # environment variable
+
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+
+.. hint::
+
+   If you get the following error during this tutorial:
+
+    .. code-block:: bash
+
+      ModuleNotFoundError: No module named 'icefall'
+
+  please set the above environment variable to fix it.
+
+
+Congratulations! You have installed `icefall`_ successfully.
+
+For the more curious
+--------------------
+
+`icefall`_ contains a collection of Python scripts and you don't need to
+use ``python3 setup.py install`` or ``pip install icefall`` to install it.
+All you need to do is to download the code and set the environment variable
+``PYTHONPATH``.
diff --git a/_sources/for-dummies/index.rst.txt b/_sources/for-dummies/index.rst.txt
new file mode 100644
index 000000000..7c0a3d8ee
--- /dev/null
+++ b/_sources/for-dummies/index.rst.txt
@@ -0,0 +1,34 @@
+Icefall for dummies tutorial
+============================
+
+This tutorial walks you step by step about how to create a simple
+ASR (`Automatic Speech Recognition <https://en.wikipedia.org/wiki/Speech_recognition>`_)
+system with `Next-gen Kaldi`_.
+
+We use the `yesno`_ dataset for demonstration. We select it out of two reasons:
+
+  - It is quite tiny, containing only about 12 minutes of data
+  - The training can be finished within 20 seconds on ``CPU``.
+
+That also means you don't need a ``GPU`` to run this tutorial.
+
+Let's get started!
+
+Please follow items below **sequentially**.
+
+.. note::
+
+   The :ref:`dummies_tutorial_data_preparation` runs only on Linux and on macOS.
+   All other parts run on Linux, macOS, and Windows.
+
+   Help from the community is appreciated to port the :ref:`dummies_tutorial_data_preparation`
+   to Windows.
+
+.. toctree::
+   :maxdepth: 2
+
+   ./environment-setup.rst
+   ./data-preparation.rst
+   ./training.rst
+   ./decoding.rst
+   ./model-export.rst
diff --git a/_sources/for-dummies/model-export.rst.txt b/_sources/for-dummies/model-export.rst.txt
new file mode 100644
index 000000000..079ebc712
--- /dev/null
+++ b/_sources/for-dummies/model-export.rst.txt
@@ -0,0 +1,310 @@
+Model Export
+============
+
+There are three ways to export a pre-trained model.
+
+  - Export the model parameters via `model.state_dict() <https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.state_dict>`_
+  - Export via `torchscript <https://pytorch.org/docs/stable/jit.html>`_: either `torch.jit.script() <https://pytorch.org/docs/stable/generated/torch.jit.script.html#torch.jit.script>`_ or `torch.jit.trace() <https://pytorch.org/docs/stable/generated/torch.jit.trace.html>`_
+  - Export to `ONNX`_ via `torch.onnx.export() <https://pytorch.org/docs/stable/onnx.html>`_
+
+Each method is explained below in detail.
+
+Export the model parameters via model.state_dict()
+---------------------------------------------------
+
+The command for this kind of export is
+
+.. code-block:: bash
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   # assume that "--epoch 14 --avg 2" produces the lowest WER.
+
+   ./tdnn/export.py --epoch 14 --avg 2
+
+The output logs are given below:
+
+.. code-block:: bash
+
+  2023-08-16 20:42:03,912 INFO [export.py:76] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'jit': False}
+  2023-08-16 20:42:03,913 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+  2023-08-16 20:42:03,950 INFO [export.py:93] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
+  2023-08-16 20:42:03,971 INFO [export.py:106] Not using torch.jit.script
+  2023-08-16 20:42:03,974 INFO [export.py:111] Saved to tdnn/exp/pretrained.pt
+
+We can see from the logs that the exported model is saved to the file ``tdnn/exp/pretrained.pt``.
+
+To give you an idea of what ``tdnn/exp/pretrained.pt`` contains, we can use the following command:
+
+.. code-block:: python3
+
+    >>> import torch
+    >>> m = torch.load("tdnn/exp/pretrained.pt")
+    >>> list(m.keys())
+    ['model']
+    >>> list(m["model"].keys())
+    ['tdnn.0.weight', 'tdnn.0.bias', 'tdnn.2.running_mean', 'tdnn.2.running_var', 'tdnn.2.num_batches_tracked', 'tdnn.3.weight', 'tdnn.3.bias', 'tdnn.5.running_mean', 'tdnn.5.running_var', 'tdnn.5.num_batches_tracked', 'tdnn.6.weight', 'tdnn.6.bias', 'tdnn.8.running_mean', 'tdnn.8.running_var', 'tdnn.8.num_batches_tracked', 'output_linear.weight', 'output_linear.bias']
+
+We can use ``tdnn/exp/pretrained.pt`` in the following way with ``./tdnn/decode.py``:
+
+.. code-block:: bash
+
+   cd tdnn/exp
+   ln -s pretrained.pt epoch-99.pt
+   cd ../..
+
+   ./tdnn/decode.py --epoch 99 --avg 1
+
+The output logs of the above command are given below:
+
+.. code-block:: bash
+
+    2023-08-16 20:45:48,089 INFO [decode.py:262] Decoding started
+    2023-08-16 20:45:48,090 INFO [decode.py:263] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 99, 'avg': 1, 'export': False, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': False, 'k2-git-sha1': 'ad79f1c699c684de9785ed6ca5edb805a41f78c3', 'k2-git-date': 'Wed Jul 26 09:30:42 2023', 'lhotse-version': '1.16.0.dev+git.aa073f6.clean', 'torch-version': '2.0.0', 'torch-cuda-available': False, 'torch-cuda-version': None, 'python-version': '3.1', 'icefall-git-branch': 'master', 'icefall-git-sha1': '9a47c08-clean', 'icefall-git-date': 'Mon Aug 14 22:10:50 2023', 'icefall-path': '/private/tmp/icefall', 'k2-path': '/private/tmp/icefall_env/lib/python3.11/site-packages/k2/__init__.py', 'lhotse-path': '/private/tmp/icefall_env/lib/python3.11/site-packages/lhotse/__init__.py', 'hostname': 'fangjuns-MacBook-Pro.local', 'IP address': '127.0.0.1'}}
+    2023-08-16 20:45:48,092 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+    2023-08-16 20:45:48,103 INFO [decode.py:272] device: cpu
+    2023-08-16 20:45:48,109 INFO [checkpoint.py:112] Loading checkpoint from tdnn/exp/epoch-99.pt
+    2023-08-16 20:45:48,115 INFO [asr_datamodule.py:218] About to get test cuts
+    2023-08-16 20:45:48,115 INFO [asr_datamodule.py:253] About to get test cuts
+    2023-08-16 20:45:50,386 INFO [decode.py:203] batch 0/?, cuts processed until now is 4
+    2023-08-16 20:45:50,556 INFO [decode.py:240] The transcripts are stored in tdnn/exp/recogs-test_set.txt
+    2023-08-16 20:45:50,557 INFO [utils.py:564] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
+    2023-08-16 20:45:50,558 INFO [decode.py:248] Wrote detailed error stats to tdnn/exp/errs-test_set.txt
+    2023-08-16 20:45:50,559 INFO [decode.py:315] Done!
+
+We can see that it produces an identical WER as before.
+
+We can also use it to decode files with the following command:
+
+.. code-block:: bash
+
+  # ./tdnn/pretrained.py requires kaldifeat
+  #
+  # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html
+  # for how to install kaldifeat
+
+  pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
+
+  ./tdnn/pretrained.py \
+    --checkpoint ./tdnn/exp/pretrained.pt \
+    --HLG ./data/lang_phone/HLG.pt \
+    --words-file ./data/lang_phone/words.txt \
+    download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+The output is given below:
+
+.. code-block:: bash
+
+  2023-08-16 20:53:19,208 INFO [pretrained.py:136] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'checkpoint': './tdnn/exp/pretrained.pt', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']}
+  2023-08-16 20:53:19,208 INFO [pretrained.py:142] device: cpu
+  2023-08-16 20:53:19,208 INFO [pretrained.py:144] Creating model
+  2023-08-16 20:53:19,212 INFO [pretrained.py:156] Loading HLG from ./data/lang_phone/HLG.pt
+  2023-08-16 20:53:19,213 INFO [pretrained.py:160] Constructing Fbank computer
+  2023-08-16 20:53:19,213 INFO [pretrained.py:170] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']
+  2023-08-16 20:53:19,224 INFO [pretrained.py:176] Decoding started
+  2023-08-16 20:53:19,304 INFO [pretrained.py:212]
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+  NO NO NO YES NO NO NO YES
+
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+  NO NO YES NO NO NO YES NO
+
+
+  2023-08-16 20:53:19,304 INFO [pretrained.py:214] Decoding Done
+
+
+Export via torch.jit.script()
+-----------------------------
+
+The command for this kind of export is
+
+.. code-block:: bash
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   # assume that "--epoch 14 --avg 2" produces the lowest WER.
+
+   ./tdnn/export.py --epoch 14 --avg 2 --jit true
+
+The output logs are given below:
+
+.. code-block:: bash
+
+  2023-08-16 20:47:44,666 INFO [export.py:76] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'jit': True}
+  2023-08-16 20:47:44,667 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+  2023-08-16 20:47:44,670 INFO [export.py:93] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
+  2023-08-16 20:47:44,677 INFO [export.py:100] Using torch.jit.script
+  2023-08-16 20:47:44,843 INFO [export.py:104] Saved to tdnn/exp/cpu_jit.pt
+
+From the output logs we can see that the generated file is saved to ``tdnn/exp/cpu_jit.pt``.
+
+Don't be confused by the name ``cpu_jit.pt``. The ``cpu`` part means the model is moved to
+CPU before exporting. That means, when you load it with:
+
+.. code-block:: bash
+
+   torch.jit.load()
+
+you don't need to specify the argument `map_location <https://pytorch.org/docs/stable/generated/torch.jit.load.html#torch.jit.load>`_
+and it resides on CPU by default.
+
+To use ``tdnn/exp/cpu_jit.pt`` with `icefall`_ to decode files, we can use:
+
+.. code-block:: bash
+
+  # ./tdnn/jit_pretrained.py requires kaldifeat
+  #
+  # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html
+  # for how to install kaldifeat
+
+  pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
+
+
+  ./tdnn/jit_pretrained.py \
+    --nn-model ./tdnn/exp/cpu_jit.pt \
+    --HLG ./data/lang_phone/HLG.pt \
+    --words-file ./data/lang_phone/words.txt \
+    download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+The output is given below:
+
+.. code-block:: bash
+
+  2023-08-16 20:56:00,603 INFO [jit_pretrained.py:121] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'nn_model': './tdnn/exp/cpu_jit.pt', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']}
+  2023-08-16 20:56:00,603 INFO [jit_pretrained.py:127] device: cpu
+  2023-08-16 20:56:00,603 INFO [jit_pretrained.py:129] Loading torchscript model
+  2023-08-16 20:56:00,640 INFO [jit_pretrained.py:134] Loading HLG from ./data/lang_phone/HLG.pt
+  2023-08-16 20:56:00,641 INFO [jit_pretrained.py:138] Constructing Fbank computer
+  2023-08-16 20:56:00,641 INFO [jit_pretrained.py:148] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']
+  2023-08-16 20:56:00,642 INFO [jit_pretrained.py:154] Decoding started
+  2023-08-16 20:56:00,727 INFO [jit_pretrained.py:190]
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+  NO NO NO YES NO NO NO YES
+
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+  NO NO YES NO NO NO YES NO
+
+
+  2023-08-16 20:56:00,727 INFO [jit_pretrained.py:192] Decoding Done
+
+.. hint::
+
+   We provide only code for ``torch.jit.script()``. You can try ``torch.jit.trace()``
+   if you want.
+
+Export via torch.onnx.export()
+------------------------------
+
+The command for this kind of export is
+
+.. code-block:: bash
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   # tdnn/export_onnx.py requires onnx and onnxruntime
+   pip install onnx onnxruntime
+
+   # assume that "--epoch 14 --avg 2" produces the lowest WER.
+
+   ./tdnn/export_onnx.py \
+     --epoch 14 \
+     --avg 2
+
+The output logs are given below:
+
+.. code-block:: bash
+
+  2023-08-16 20:59:20,888 INFO [export_onnx.py:83] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_double_scores': True, 'epoch': 14, 'avg': 2}
+  2023-08-16 20:59:20,888 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+  2023-08-16 20:59:20,892 INFO [export_onnx.py:100] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
+  ================ Diagnostic Run torch.onnx.export version 2.0.0 ================
+  verbose: False, log level: Level.ERROR
+  ======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
+
+  2023-08-16 20:59:21,047 INFO [export_onnx.py:127] Saved to tdnn/exp/model-epoch-14-avg-2.onnx
+  2023-08-16 20:59:21,047 INFO [export_onnx.py:136] meta_data: {'model_type': 'tdnn', 'version': '1', 'model_author': 'k2-fsa', 'comment': 'non-streaming tdnn for the yesno recipe', 'vocab_size': 4}
+  2023-08-16 20:59:21,049 INFO [export_onnx.py:140] Generate int8 quantization models
+  2023-08-16 20:59:21,075 INFO [onnx_quantizer.py:538] Quantization parameters for tensor:"/Transpose_1_output_0" not specified
+  2023-08-16 20:59:21,081 INFO [export_onnx.py:151] Saved to tdnn/exp/model-epoch-14-avg-2.int8.onnx
+
+We can see from the logs that it generates two files:
+
+  - ``tdnn/exp/model-epoch-14-avg-2.onnx`` (ONNX model with ``float32`` weights)
+  - ``tdnn/exp/model-epoch-14-avg-2.int8.onnx`` (ONNX model with ``int8`` weights)
+
+To use the generated ONNX model files for decoding with `onnxruntime`_, we can use
+
+.. code-block:: bash
+
+  # ./tdnn/onnx_pretrained.py requires kaldifeat
+  #
+  # Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html
+  # for how to install kaldifeat
+
+  pip install kaldifeat==1.25.0.dev20230726+cpu.torch2.0.0 -f https://csukuangfj.github.io/kaldifeat/cpu.html
+
+  ./tdnn/onnx_pretrained.py \
+    --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
+    --HLG ./data/lang_phone/HLG.pt \
+    --words-file ./data/lang_phone/words.txt \
+    download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+    download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+The output is given below:
+
+.. code-block:: bash
+
+  2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:166] {'feature_dim': 23, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'nn_model': './tdnn/exp/model-epoch-14-avg-2.onnx', 'words_file': './data/lang_phone/words.txt', 'HLG': './data/lang_phone/HLG.pt', 'sound_files': ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']}
+  2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:171] device: cpu
+  2023-08-16 21:03:24,260 INFO [onnx_pretrained.py:173] Loading onnx model ./tdnn/exp/model-epoch-14-avg-2.onnx
+  2023-08-16 21:03:24,267 INFO [onnx_pretrained.py:176] Loading HLG from ./data/lang_phone/HLG.pt
+  2023-08-16 21:03:24,270 INFO [onnx_pretrained.py:180] Constructing Fbank computer
+  2023-08-16 21:03:24,273 INFO [onnx_pretrained.py:190] Reading sound files: ['download/waves_yesno/0_0_0_1_0_0_0_1.wav', 'download/waves_yesno/0_0_1_0_0_0_1_0.wav']
+  2023-08-16 21:03:24,279 INFO [onnx_pretrained.py:196] Decoding started
+  2023-08-16 21:03:24,318 INFO [onnx_pretrained.py:232]
+  download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+  NO NO NO YES NO NO NO YES
+
+  download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+  NO NO YES NO NO NO YES NO
+
+
+  2023-08-16 21:03:24,318 INFO [onnx_pretrained.py:234] Decoding Done
+
+.. note::
+
+   To use the ``int8`` ONNX model for decoding, please use:
+
+   .. code-block:: bash
+
+      ./tdnn/onnx_pretrained.py \
+        --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
+        --HLG ./data/lang_phone/HLG.pt \
+        --words-file ./data/lang_phone/words.txt \
+        download/waves_yesno/0_0_0_1_0_0_0_1.wav \
+        download/waves_yesno/0_0_1_0_0_0_1_0.wav
+
+For the more curious
+--------------------
+
+If you are wondering how to deploy the model without ``torch``, please
+continue reading. We will show how to use `sherpa-onnx`_ to run the
+exported ONNX models, which depends only on `onnxruntime`_ and does not
+depend on ``torch``.
+
+In this tutorial, we will only demonstrate the usage of `sherpa-onnx`_ with the
+pre-trained model of the `yesno`_ recipe. There are also other two frameworks
+available:
+
+  - `sherpa`_. It works with torchscript models.
+  - `sherpa-ncnn`_. It works with models exported using :ref:`icefall_export_to_ncnn` with `ncnn`_
+
+Please see `<https://k2-fsa.github.io/sherpa/>`_ for further details.
diff --git a/_sources/for-dummies/training.rst.txt b/_sources/for-dummies/training.rst.txt
new file mode 100644
index 000000000..816ef2d3b
--- /dev/null
+++ b/_sources/for-dummies/training.rst.txt
@@ -0,0 +1,39 @@
+.. _dummies_tutorial_training:
+
+Training
+========
+
+After :ref:`dummies_tutorial_data_preparation`, we can start training.
+
+The command to start the training is quite simple:
+
+.. code-block:: bash
+
+   cd /tmp/icefall
+   export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+   cd egs/yesno/ASR
+
+   # We use CPU for training by setting the following environment variable
+   export CUDA_VISIBLE_DEVICES=""
+
+   ./tdnn/train.py
+
+That's it!
+
+You can find the training logs below:
+
+.. literalinclude:: ./code/train-yesno.txt
+
+For the more curious
+--------------------
+
+.. code-block:: bash
+
+   ./tdnn/train.py --help
+
+will print the usage information about ``./tdnn/train.py``. For instance, you
+can specify the number of epochs to train and the location to save the training
+results.
+
+The training text logs are saved in ``tdnn/exp/log`` while the tensorboard
+logs are in ``tdnn/exp/tensorboard``.
diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt
index 0fa8fdd1c..fb539d3f2 100644
--- a/_sources/index.rst.txt
+++ b/_sources/index.rst.txt
@@ -20,6 +20,7 @@ speech recognition recipes using `k2 <https://github.com/k2-fsa/k2>`_.
    :maxdepth: 2
    :caption: Contents:
 
+   for-dummies/index.rst
    installation/index
    docker/index
    faqs
diff --git a/contributing/code-style.html b/contributing/code-style.html
index 603669e06..a0ce2b2dd 100644
--- a/contributing/code-style.html
+++ b/contributing/code-style.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/contributing/doc.html b/contributing/doc.html
index e67616dcf..f2aad3630 100644
--- a/contributing/doc.html
+++ b/contributing/doc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/contributing/how-to-create-a-recipe.html b/contributing/how-to-create-a-recipe.html
index 926ff92e5..814428894 100644
--- a/contributing/how-to-create-a-recipe.html
+++ b/contributing/how-to-create-a-recipe.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/contributing/index.html b/contributing/index.html
index 7b3ab0879..0c0e40653 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/decoding-with-langugage-models/LODR.html b/decoding-with-langugage-models/LODR.html
index d5104cf35..b617fe5ae 100644
--- a/decoding-with-langugage-models/LODR.html
+++ b/decoding-with-langugage-models/LODR.html
@@ -45,6 +45,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/decoding-with-langugage-models/index.html b/decoding-with-langugage-models/index.html
index 3482a6238..0c774f9d7 100644
--- a/decoding-with-langugage-models/index.html
+++ b/decoding-with-langugage-models/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/decoding-with-langugage-models/rescoring.html b/decoding-with-langugage-models/rescoring.html
index 6c8568d35..5e15a50c1 100644
--- a/decoding-with-langugage-models/rescoring.html
+++ b/decoding-with-langugage-models/rescoring.html
@@ -43,6 +43,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/decoding-with-langugage-models/shallow-fusion.html b/decoding-with-langugage-models/shallow-fusion.html
index 7a81112ae..d27260df2 100644
--- a/decoding-with-langugage-models/shallow-fusion.html
+++ b/decoding-with-langugage-models/shallow-fusion.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/docker/index.html b/docker/index.html
index a4a1f227f..451a76364 100644
--- a/docker/index.html
+++ b/docker/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Docker</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="intro.html">Introduction</a></li>
diff --git a/docker/intro.html b/docker/intro.html
index 4b7303409..e8dc09764 100644
--- a/docker/intro.html
+++ b/docker/intro.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Docker</a><ul class="current">
 <li class="toctree-l2 current"><a class="current reference internal" href="#">Introduction</a></li>
diff --git a/faqs.html b/faqs.html
index 56efdd400..dc10eba08 100644
--- a/faqs.html
+++ b/faqs.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="docker/index.html">Docker</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Frequently Asked Questions (FAQs)</a><ul>
diff --git a/for-dummies/data-preparation.html b/for-dummies/data-preparation.html
new file mode 100644
index 000000000..dcd4d4e7a
--- /dev/null
+++ b/for-dummies/data-preparation.html
@@ -0,0 +1,299 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Data Preparation &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Training" href="training.html" />
+    <link rel="prev" title="Environment setup" href="environment-setup.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Icefall for dummies tutorial</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Data Preparation</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#for-the-more-curious">For the more curious</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#a-quick-look-to-the-generated-files">A quick look to the generated files</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="#download">download</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#data">data</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="training.html">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html">Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html">Model Export</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="index.html">Icefall for dummies tutorial</a></li>
+      <li class="breadcrumb-item active">Data Preparation</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/data-preparation.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="data-preparation">
+<span id="dummies-tutorial-data-preparation"></span><h1>Data Preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h1>
+<p>After <a class="reference internal" href="environment-setup.html#dummies-tutorial-environment-setup"><span class="std std-ref">Environment setup</span></a>, we can start preparing the
+data for training and decoding.</p>
+<p>The first step is to prepare the data for training. We have already provided
+<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh">prepare.sh</a>
+that would prepare everything required for training.</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>cd /tmp/icefall
+export PYTHONPATH=/tmp/icefall:$PYTHONPATH
+cd egs/yesno/ASR
+
+./prepare.sh
+</pre></div>
+</div>
+<p>Note that in each recipe from <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a>, there exists a file <code class="docutils literal notranslate"><span class="pre">prepare.sh</span></code>,
+which you should run before you run anything else.</p>
+<p>That is all you need for data preparation.</p>
+<section id="for-the-more-curious">
+<h2>For the more curious<a class="headerlink" href="#for-the-more-curious" title="Permalink to this heading"></a></h2>
+<p>If you are wondering how to prepare your own dataset, please refer to the following
+URLs for more details:</p>
+<blockquote>
+<div><ul>
+<li><p><a class="reference external" href="https://github.com/lhotse-speech/lhotse/tree/master/lhotse/recipes">https://github.com/lhotse-speech/lhotse/tree/master/lhotse/recipes</a></p>
+<p>It contains recipes for a variety of dataset. If you want to add your own
+dataset, please read recipes in this folder first.</p>
+</li>
+<li><p><a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py">https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py</a></p>
+<p>The <a class="reference external" href="https://www.openslr.org/1/">yesno</a> recipe in <a class="reference external" href="https://github.com/lhotse-speech/lhotse">lhotse</a>.</p>
+</li>
+</ul>
+</div></blockquote>
+<p>If you already have a <a class="reference external" href="https://github.com/kaldi-asr/kaldi">Kaldi</a> dataset directory, which contains files like
+<code class="docutils literal notranslate"><span class="pre">wav.scp</span></code>, <code class="docutils literal notranslate"><span class="pre">feats.scp</span></code>, then you can refer to <a class="reference external" href="https://lhotse.readthedocs.io/en/latest/kaldi.html#example">https://lhotse.readthedocs.io/en/latest/kaldi.html#example</a>.</p>
+</section>
+<section id="a-quick-look-to-the-generated-files">
+<h2>A quick look to the generated files<a class="headerlink" href="#a-quick-look-to-the-generated-files" title="Permalink to this heading"></a></h2>
+<p><code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> puts generated files into two directories:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">download</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">data</span></code></p></li>
+</ul>
+</div></blockquote>
+<section id="download">
+<h3>download<a class="headerlink" href="#download" title="Permalink to this heading"></a></h3>
+<p>The <code class="docutils literal notranslate"><span class="pre">download</span></code> directory contains downloaded dataset files:</p>
+<div class="highlight-bas notranslate"><div class="highlight"><pre><span></span>tree -L 1 ./download/
+
+./download/
+|-- waves_yesno
+`-- waves_yesno.tar.gz
+</pre></div>
+</div>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>Please refer to <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py#L41">https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/yesno.py#L41</a>
+for how the data is downloaded and extracted.</p>
+</div>
+</section>
+<section id="data">
+<h3>data<a class="headerlink" href="#data" title="Permalink to this heading"></a></h3>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>tree<span class="w"> </span>./data/
+
+./data/
+<span class="p">|</span>--<span class="w"> </span>fbank
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>yesno_cuts_test.jsonl.gz
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>yesno_cuts_train.jsonl.gz
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>yesno_feats_test.lca
+<span class="p">|</span><span class="w">   </span><span class="sb">`</span>--<span class="w"> </span>yesno_feats_train.lca
+<span class="p">|</span>--<span class="w"> </span>lang_phone
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>HLG.pt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>L.pt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>L_disambig.pt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>Linv.pt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>lexicon.txt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>lexicon_disambig.txt
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>tokens.txt
+<span class="p">|</span><span class="w">   </span><span class="sb">`</span>--<span class="w"> </span>words.txt
+<span class="p">|</span>--<span class="w"> </span>lm
+<span class="p">|</span><span class="w">   </span><span class="p">|</span>--<span class="w"> </span>G.arpa
+<span class="p">|</span><span class="w">   </span><span class="sb">`</span>--<span class="w"> </span>G.fst.txt
+<span class="sb">`</span>--<span class="w"> </span>manifests
+<span class="w">    </span><span class="p">|</span>--<span class="w"> </span>yesno_recordings_test.jsonl.gz
+<span class="w">    </span><span class="p">|</span>--<span class="w"> </span>yesno_recordings_train.jsonl.gz
+<span class="w">    </span><span class="p">|</span>--<span class="w"> </span>yesno_supervisions_test.jsonl.gz
+<span class="w">    </span><span class="sb">`</span>--<span class="w"> </span>yesno_supervisions_train.jsonl.gz
+
+<span class="m">4</span><span class="w"> </span>directories,<span class="w"> </span><span class="m">18</span><span class="w"> </span>files
+</pre></div>
+</div>
+<p><strong>data/manifests</strong>:</p>
+<blockquote>
+<div><p>This directory contains manifests. They are used to generate files in
+<code class="docutils literal notranslate"><span class="pre">data/fbank</span></code>.</p>
+<p>To give you an idea of what it contains, we examine the first few lines of
+the manifests related to the <code class="docutils literal notranslate"><span class="pre">train</span></code> dataset.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>data/manifests
+gunzip<span class="w"> </span>-c<span class="w">  </span>yesno_recordings_train.jsonl.gz<span class="w">  </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">3</span>
+</pre></div>
+</div>
+<p>The output is given below:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">50800</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">48880</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">48160</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Please refer to <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/audio.py#L300">https://github.com/lhotse-speech/lhotse/blob/master/lhotse/audio.py#L300</a>
+for the meaning of each field per line.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>gunzip<span class="w"> </span>-c<span class="w">  </span>yesno_supervisions_train.jsonl.gz<span class="w">  </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">3</span>
+</pre></div>
+</div>
+<p>The output is given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO NO NO YES YES YES YES&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO NO YES NO YES YES NO&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO YES NO NO YES YES NO&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}</span>
+</pre></div>
+</div>
+<p>Please refer to <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/supervision.py#L510">https://github.com/lhotse-speech/lhotse/blob/master/lhotse/supervision.py#L510</a>
+for the meaning of each field per line.</p>
+</div></blockquote>
+<p><strong>data/fbank</strong>:</p>
+<blockquote>
+<div><p>This directory contains everything from <code class="docutils literal notranslate"><span class="pre">data/manifests</span></code>. Furthermore, it also contains features
+for training.</p>
+<p><code class="docutils literal notranslate"><span class="pre">data/fbank/yesno_feats_train.lca</span></code> contains the features for the train dataset.
+Features are compressed using <a class="reference external" href="https://github.com/danpovey/lilcom">lilcom</a>.</p>
+<p><code class="docutils literal notranslate"><span class="pre">data/fbank/yesno_cuts_train.jsonl.gz</span></code> stores the <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/cut/set.py#L72">CutSet</a>,
+which stores <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/audio.py#L928">RecordingSet</a>,
+<a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/supervision.py#L510">SupervisionSet</a>,
+and <a class="reference external" href="https://github.com/lhotse-speech/lhotse/blob/master/lhotse/features/base.py#L593">FeatureSet</a>.</p>
+<p>To give you an idea about what it looks like, we can run the following command:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>data/fbank
+
+gunzip<span class="w"> </span>-c<span class="w"> </span>yesno_cuts_train.jsonl.gz<span class="w"> </span><span class="p">|</span><span class="w"> </span>head<span class="w"> </span>-n<span class="w"> </span><span class="m">3</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>The output is given below:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1-0&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;supervisions&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO NO NO YES YES YES YES&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;features&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;kaldi-fbank&quot;</span>,<span class="w"> </span><span class="s2">&quot;num_frames&quot;</span>:<span class="w"> </span><span class="m">635</span>,<span class="w"> </span><span class="s2">&quot;num_features&quot;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s2">&quot;frame_shift&quot;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;storage_type&quot;</span>:<span class="w"> </span><span class="s2">&quot;lilcom_chunky&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_path&quot;</span>:<span class="w"> </span><span class="s2">&quot;data/fbank/yesno_feats_train.lca&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_key&quot;</span>:<span class="w"> </span><span class="s2">&quot;0,13000,3570&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="m">0</span><span class="o">}</span>,<span class="w"> </span><span class="s2">&quot;recording&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_0_1_1_1_1&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_0_1_1_1_1.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">50800</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.35,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>,<span class="w"> </span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;MonoCut&quot;</span><span class="o">}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0-1&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;supervisions&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO NO YES NO YES YES NO&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;features&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;kaldi-fbank&quot;</span>,<span class="w"> </span><span class="s2">&quot;num_frames&quot;</span>:<span class="w"> </span><span class="m">611</span>,<span class="w"> </span><span class="s2">&quot;num_features&quot;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s2">&quot;frame_shift&quot;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;storage_type&quot;</span>:<span class="w"> </span><span class="s2">&quot;lilcom_chunky&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_path&quot;</span>:<span class="w"> </span><span class="s2">&quot;data/fbank/yesno_feats_train.lca&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_key&quot;</span>:<span class="w"> </span><span class="s2">&quot;16570,12964,2929&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="m">0</span><span class="o">}</span>,<span class="w"> </span><span class="s2">&quot;recording&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_0_1_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_0_1_0_1_1_0.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">48880</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.11,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>,<span class="w"> </span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;MonoCut&quot;</span><span class="o">}</span>
+<span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0-2&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;supervisions&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;recording_id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>.0,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;channel&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;text&quot;</span>:<span class="w"> </span><span class="s2">&quot;NO NO YES NO NO YES YES NO&quot;</span>,<span class="w"> </span><span class="s2">&quot;language&quot;</span>:<span class="w"> </span><span class="s2">&quot;Hebrew&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;features&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;kaldi-fbank&quot;</span>,<span class="w"> </span><span class="s2">&quot;num_frames&quot;</span>:<span class="w"> </span><span class="m">602</span>,<span class="w"> </span><span class="s2">&quot;num_features&quot;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s2">&quot;frame_shift&quot;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;start&quot;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;storage_type&quot;</span>:<span class="w"> </span><span class="s2">&quot;lilcom_chunky&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_path&quot;</span>:<span class="w"> </span><span class="s2">&quot;data/fbank/yesno_feats_train.lca&quot;</span>,<span class="w"> </span><span class="s2">&quot;storage_key&quot;</span>:<span class="w"> </span><span class="s2">&quot;32463,12936,2696&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="m">0</span><span class="o">}</span>,<span class="w"> </span><span class="s2">&quot;recording&quot;</span>:<span class="w"> </span><span class="o">{</span><span class="s2">&quot;id&quot;</span>:<span class="w"> </span><span class="s2">&quot;0_0_1_0_0_1_1_0&quot;</span>,<span class="w"> </span><span class="s2">&quot;sources&quot;</span>:<span class="w"> </span><span class="o">[{</span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;file&quot;</span>,<span class="w"> </span><span class="s2">&quot;channels&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]</span>,<span class="w"> </span><span class="s2">&quot;source&quot;</span>:<span class="w"> </span><span class="s2">&quot;/tmp/icefall/egs/yesno/ASR/download/waves_yesno/0_0_1_0_0_1_1_0.wav&quot;</span><span class="o">}]</span>,<span class="w"> </span><span class="s2">&quot;sampling_rate&quot;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s2">&quot;num_samples&quot;</span>:<span class="w"> </span><span class="m">48160</span>,<span class="w"> </span><span class="s2">&quot;duration&quot;</span>:<span class="w"> </span><span class="m">6</span>.02,<span class="w"> </span><span class="s2">&quot;channel_ids&quot;</span>:<span class="w"> </span><span class="o">[</span><span class="m">0</span><span class="o">]}</span>,<span class="w"> </span><span class="s2">&quot;type&quot;</span>:<span class="w"> </span><span class="s2">&quot;MonoCut&quot;</span><span class="o">}</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Note that <code class="docutils literal notranslate"><span class="pre">yesno_cuts_train.jsonl.gz</span></code> only stores the information about how to read the features.
+The actual features are stored separately in <code class="docutils literal notranslate"><span class="pre">data/fbank/yesno_feats_train.lca</span></code>.</p>
+</div></blockquote>
+<p><strong>data/lang</strong>:</p>
+<blockquote>
+<div><p>This directory contains the lexicon.</p>
+</div></blockquote>
+<p><strong>data/lm</strong>:</p>
+<blockquote>
+<div><p>This directory contains language models.</p>
+</div></blockquote>
+</section>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="environment-setup.html" class="btn btn-neutral float-left" title="Environment setup" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="training.html" class="btn btn-neutral float-right" title="Training" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/for-dummies/decoding.html b/for-dummies/decoding.html
new file mode 100644
index 000000000..7bd53ff44
--- /dev/null
+++ b/for-dummies/decoding.html
@@ -0,0 +1,163 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Decoding &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Model Export" href="model-export.html" />
+    <link rel="prev" title="Training" href="training.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Icefall for dummies tutorial</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="training.html">Training</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Decoding</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html">Model Export</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="index.html">Icefall for dummies tutorial</a></li>
+      <li class="breadcrumb-item active">Decoding</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/decoding.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="decoding">
+<span id="dummies-tutorial-decoding"></span><h1>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h1>
+<p>After <a class="reference internal" href="training.html#dummies-tutorial-training"><span class="std std-ref">Training</span></a>, we can start decoding.</p>
+<p>The command to start the decoding is quite simple:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp/icefall
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+<span class="nb">cd</span><span class="w"> </span>egs/yesno/ASR
+
+<span class="c1"># We use CPU for decoding by setting the following environment variable</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
+
+./tdnn/decode.py
+</pre></div>
+</div>
+<p>The output logs are given below:</p>
+<section id="for-the-more-curious">
+<h2>For the more curious<a class="headerlink" href="#for-the-more-curious" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./tdnn/decode.py<span class="w"> </span>--help
+</pre></div>
+</div>
+<p>will print the usage information about <code class="docutils literal notranslate"><span class="pre">./tdnn/decode.py</span></code>. For instance, you
+can specify:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">--epoch</span></code> to use which checkpoint for decoding</p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--avg</span></code> to select how many checkpoints to use for model averaging</p></li>
+</ul>
+</div></blockquote>
+<p>You usually try different combinations of <code class="docutils literal notranslate"><span class="pre">--epoch</span></code> and <code class="docutils literal notranslate"><span class="pre">--avg</span></code> and select
+one that leads to the lowest WER (<a class="reference external" href="https://en.wikipedia.org/wiki/Word_error_rate">Word Error Rate</a>).</p>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="training.html" class="btn btn-neutral float-left" title="Training" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="model-export.html" class="btn btn-neutral float-right" title="Model Export" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/for-dummies/environment-setup.html b/for-dummies/environment-setup.html
new file mode 100644
index 000000000..b81641a44
--- /dev/null
+++ b/for-dummies/environment-setup.html
@@ -0,0 +1,238 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Environment setup &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Data Preparation" href="data-preparation.html" />
+    <link rel="prev" title="Icefall for dummies tutorial" href="index.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Icefall for dummies tutorial</a><ul class="current">
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Environment setup</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#create-a-virtual-environment">Create a virtual environment</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#install-dependencies">Install dependencies</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#install-icefall">Install icefall</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="training.html">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html">Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html">Model Export</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="index.html">Icefall for dummies tutorial</a></li>
+      <li class="breadcrumb-item active">Environment setup</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/environment-setup.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="environment-setup">
+<span id="dummies-tutorial-environment-setup"></span><h1>Environment setup<a class="headerlink" href="#environment-setup" title="Permalink to this heading"></a></h1>
+<p>We will create an environment for <a class="reference external" href="https://github.com/k2-fsa">Next-gen Kaldi</a> that runs on <code class="docutils literal notranslate"><span class="pre">CPU</span></code>
+in this tutorial.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Since the <a class="reference external" href="https://www.openslr.org/1/">yesno</a> dataset used in this tutorial is very tiny, training on
+<code class="docutils literal notranslate"><span class="pre">CPU</span></code> works very well for it.</p>
+<p>If your dataset is very large, e.g., hundreds or thousands of hours of
+training data, please follow <a class="reference internal" href="../installation/index.html#install-icefall"><span class="std std-ref">Installation</span></a> to install <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a>
+that works with <code class="docutils literal notranslate"><span class="pre">GPU</span></code>.</p>
+</div>
+<section id="create-a-virtual-environment">
+<h2>Create a virtual environment<a class="headerlink" href="#create-a-virtual-environment" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>virtualenv<span class="w"> </span>-p<span class="w"> </span>python3<span class="w"> </span>/tmp/icefall_env
+</pre></div>
+</div>
+<p>The above command creates a virtual environment in the directory <code class="docutils literal notranslate"><span class="pre">/tmp/icefall_env</span></code>.
+You can select any directory you want.</p>
+<p>The output of the above command is given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>Already<span class="w"> </span>using<span class="w"> </span>interpreter<span class="w"> </span>/usr/bin/python3
+Using<span class="w"> </span>base<span class="w"> </span>prefix<span class="w"> </span><span class="s1">&#39;/usr&#39;</span>
+New<span class="w"> </span>python<span class="w"> </span>executable<span class="w"> </span><span class="k">in</span><span class="w"> </span>/tmp/icefall_env/bin/python3
+Also<span class="w"> </span>creating<span class="w"> </span>executable<span class="w"> </span><span class="k">in</span><span class="w"> </span>/tmp/icefall_env/bin/python
+Installing<span class="w"> </span>setuptools,<span class="w"> </span>pkg_resources,<span class="w"> </span>pip,<span class="w"> </span>wheel...done.
+</pre></div>
+</div>
+<p>Now we can activate the environment using:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">source</span><span class="w"> </span>/tmp/icefall_env/bin/activate
+</pre></div>
+</div>
+</section>
+<section id="install-dependencies">
+<h2>Install dependencies<a class="headerlink" href="#install-dependencies" title="Permalink to this heading"></a></h2>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>Remeber to activate your virtual environment before you continue!</p>
+</div>
+<p>After activating the virtual environment, we can use the following command
+to install dependencies of <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a>:</p>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>Remeber that we will run this tutorial on <code class="docutils literal notranslate"><span class="pre">CPU</span></code>, so we install
+dependencies required only by running on <code class="docutils literal notranslate"><span class="pre">CPU</span></code>.</p>
+</div>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Caution: Installation order matters!</span>
+
+<span class="c1"># We use torch 2.0.0 and torchaduio 2.0.0 in this tutorial.</span>
+<span class="c1"># Other versions should also work.</span>
+
+pip<span class="w"> </span>install<span class="w"> </span><span class="nv">torch</span><span class="o">==</span><span class="m">2</span>.0.0+cpu<span class="w"> </span><span class="nv">torchaudio</span><span class="o">==</span><span class="m">2</span>.0.0+cpu<span class="w"> </span>-f<span class="w"> </span>https://download.pytorch.org/whl/torch_stable.html
+
+<span class="c1"># If you are using macOS or Windows, please use the following command to install torch and torchaudio</span>
+<span class="c1"># pip install torch==2.0.0 torchaudio==2.0.0 -f https://download.pytorch.org/whl/torch_stable.html</span>
+
+<span class="c1"># Now install k2</span>
+<span class="c1"># Please refer to https://k2-fsa.github.io/k2/installation/from_wheels.html#linux-cpu-example</span>
+
+pip<span class="w"> </span>install<span class="w"> </span><span class="nv">k2</span><span class="o">==</span><span class="m">1</span>.24.3.dev20230726+cpu.torch2.0.0<span class="w"> </span>-f<span class="w"> </span>https://k2-fsa.github.io/k2/cpu.html
+
+<span class="c1"># Install the latest version of lhotse</span>
+
+pip<span class="w"> </span>install<span class="w"> </span>git+https://github.com/lhotse-speech/lhotse
+</pre></div>
+</div>
+</section>
+<section id="install-icefall">
+<h2>Install icefall<a class="headerlink" href="#install-icefall" title="Permalink to this heading"></a></h2>
+<p>We will put the source code of <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a> into the directory <code class="docutils literal notranslate"><span class="pre">/tmp</span></code>
+You can select any directory you want.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp
+git<span class="w"> </span>clone<span class="w"> </span>https://github.com/k2-fsa/icefall
+<span class="nb">cd</span><span class="w"> </span>icefall
+pip<span class="w"> </span>install<span class="w"> </span>-r<span class="w"> </span>./requirements.txt
+</pre></div>
+</div>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Anytime we want to use icefall, we have to set the following</span>
+<span class="c1"># environment variable</span>
+
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+</pre></div>
+</div>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<blockquote>
+<div><p>If you get the following error during this tutorial:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ModuleNotFoundError:<span class="w"> </span>No<span class="w"> </span>module<span class="w"> </span>named<span class="w"> </span><span class="s1">&#39;icefall&#39;</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<p>please set the above environment variable to fix it.</p>
+</div>
+<p>Congratulations! You have installed <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a> successfully.</p>
+</section>
+<section id="for-the-more-curious">
+<h2>For the more curious<a class="headerlink" href="#for-the-more-curious" title="Permalink to this heading"></a></h2>
+<p><a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a> contains a collection of Python scripts and you don’t need to
+use <code class="docutils literal notranslate"><span class="pre">python3</span> <span class="pre">setup.py</span> <span class="pre">install</span></code> or <code class="docutils literal notranslate"><span class="pre">pip</span> <span class="pre">install</span> <span class="pre">icefall</span></code> to install it.
+All you need to do is to download the code and set the environment variable
+<code class="docutils literal notranslate"><span class="pre">PYTHONPATH</span></code>.</p>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="index.html" class="btn btn-neutral float-left" title="Icefall for dummies tutorial" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="data-preparation.html" class="btn btn-neutral float-right" title="Data Preparation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/for-dummies/index.html b/for-dummies/index.html
new file mode 100644
index 000000000..12ff9ef74
--- /dev/null
+++ b/for-dummies/index.html
@@ -0,0 +1,181 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Icefall for dummies tutorial &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Environment setup" href="environment-setup.html" />
+    <link rel="prev" title="Icefall" href="../index.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="current reference internal" href="#">Icefall for dummies tutorial</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="training.html">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html">Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html">Model Export</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+      <li class="breadcrumb-item active">Icefall for dummies tutorial</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/index.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="icefall-for-dummies-tutorial">
+<h1>Icefall for dummies tutorial<a class="headerlink" href="#icefall-for-dummies-tutorial" title="Permalink to this heading"></a></h1>
+<p>This tutorial walks you step by step about how to create a simple
+ASR (<a class="reference external" href="https://en.wikipedia.org/wiki/Speech_recognition">Automatic Speech Recognition</a>)
+system with <a class="reference external" href="https://github.com/k2-fsa">Next-gen Kaldi</a>.</p>
+<p>We use the <a class="reference external" href="https://www.openslr.org/1/">yesno</a> dataset for demonstration. We select it out of two reasons:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>It is quite tiny, containing only about 12 minutes of data</p></li>
+<li><p>The training can be finished within 20 seconds on <code class="docutils literal notranslate"><span class="pre">CPU</span></code>.</p></li>
+</ul>
+</div></blockquote>
+<p>That also means you don’t need a <code class="docutils literal notranslate"><span class="pre">GPU</span></code> to run this tutorial.</p>
+<p>Let’s get started!</p>
+<p>Please follow items below <strong>sequentially</strong>.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The <a class="reference internal" href="data-preparation.html#dummies-tutorial-data-preparation"><span class="std std-ref">Data Preparation</span></a> runs only on Linux and on macOS.
+All other parts run on Linux, macOS, and Windows.</p>
+<p>Help from the community is appreciated to port the <a class="reference internal" href="data-preparation.html#dummies-tutorial-data-preparation"><span class="std std-ref">Data Preparation</span></a>
+to Windows.</p>
+</div>
+<div class="toctree-wrapper compound">
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="environment-setup.html">Environment setup</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html#create-a-virtual-environment">Create a virtual environment</a></li>
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html#install-dependencies">Install dependencies</a></li>
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html#install-icefall">Install icefall</a></li>
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="data-preparation.html">Data Preparation</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html#for-the-more-curious">For the more curious</a></li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html#a-quick-look-to-the-generated-files">A quick look to the generated files</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="training.html">Training</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="training.html#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="decoding.html">Decoding</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="model-export.html">Model Export</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html#export-the-model-parameters-via-model-state-dict">Export the model parameters via model.state_dict()</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html#export-via-torch-jit-script">Export via torch.jit.script()</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html#export-via-torch-onnx-export">Export via torch.onnx.export()</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+</ul>
+</div>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="../index.html" class="btn btn-neutral float-left" title="Icefall" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="environment-setup.html" class="btn btn-neutral float-right" title="Environment setup" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/for-dummies/model-export.html b/for-dummies/model-export.html
new file mode 100644
index 000000000..52c7da5c5
--- /dev/null
+++ b/for-dummies/model-export.html
@@ -0,0 +1,410 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Model Export &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Installation" href="../installation/index.html" />
+    <link rel="prev" title="Decoding" href="decoding.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Icefall for dummies tutorial</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="training.html">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html">Decoding</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Model Export</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#export-the-model-parameters-via-model-state-dict">Export the model parameters via model.state_dict()</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#export-via-torch-jit-script">Export via torch.jit.script()</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#export-via-torch-onnx-export">Export via torch.onnx.export()</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="index.html">Icefall for dummies tutorial</a></li>
+      <li class="breadcrumb-item active">Model Export</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/model-export.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="model-export">
+<h1>Model Export<a class="headerlink" href="#model-export" title="Permalink to this heading"></a></h1>
+<p>There are three ways to export a pre-trained model.</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>Export the model parameters via <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.state_dict">model.state_dict()</a></p></li>
+<li><p>Export via <a class="reference external" href="https://pytorch.org/docs/stable/jit.html">torchscript</a>: either <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.jit.script.html#torch.jit.script">torch.jit.script()</a> or <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.jit.trace.html">torch.jit.trace()</a></p></li>
+<li><p>Export to <a class="reference external" href="https://github.com/onnx/onnx">ONNX</a> via <a class="reference external" href="https://pytorch.org/docs/stable/onnx.html">torch.onnx.export()</a></p></li>
+</ul>
+</div></blockquote>
+<p>Each method is explained below in detail.</p>
+<section id="export-the-model-parameters-via-model-state-dict">
+<h2>Export the model parameters via model.state_dict()<a class="headerlink" href="#export-the-model-parameters-via-model-state-dict" title="Permalink to this heading"></a></h2>
+<p>The command for this kind of export is</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp/icefall
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+<span class="nb">cd</span><span class="w"> </span>egs/yesno/ASR
+
+<span class="c1"># assume that &quot;--epoch 14 --avg 2&quot; produces the lowest WER.</span>
+
+./tdnn/export.py<span class="w"> </span>--epoch<span class="w"> </span><span class="m">14</span><span class="w"> </span>--avg<span class="w"> </span><span class="m">2</span>
+</pre></div>
+</div>
+<p>The output logs are given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:42:03,912<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:76<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;exp_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;tdnn/exp&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lang_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;data/lang_phone&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lr&#39;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;weight_decay&#39;</span>:<span class="w"> </span>1e-06,<span class="w"> </span><span class="s1">&#39;start_epoch&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;best_train_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_valid_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_train_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;best_valid_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;batch_idx_train&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;log_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reset_interval&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;valid_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;beam_size&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reduction&#39;</span>:<span class="w"> </span><span class="s1">&#39;sum&#39;</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;epoch&#39;</span>:<span class="w"> </span><span class="m">14</span>,<span class="w"> </span><span class="s1">&#39;avg&#39;</span>:<span class="w"> </span><span class="m">2</span>,<span class="w"> </span><span class="s1">&#39;jit&#39;</span>:<span class="w"> </span>False<span class="o">}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:42:03,913<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>lexicon.py:168<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>pre-compiled<span class="w"> </span>data/lang_phone/Linv.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:42:03,950<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:93<span class="o">]</span><span class="w"> </span>averaging<span class="w"> </span><span class="o">[</span><span class="s1">&#39;tdnn/exp/epoch-13.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;tdnn/exp/epoch-14.pt&#39;</span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:42:03,971<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:106<span class="o">]</span><span class="w"> </span>Not<span class="w"> </span>using<span class="w"> </span>torch.jit.script
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:42:03,974<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:111<span class="o">]</span><span class="w"> </span>Saved<span class="w"> </span>to<span class="w"> </span>tdnn/exp/pretrained.pt
+</pre></div>
+</div>
+<p>We can see from the logs that the exported model is saved to the file <code class="docutils literal notranslate"><span class="pre">tdnn/exp/pretrained.pt</span></code>.</p>
+<p>To give you an idea of what <code class="docutils literal notranslate"><span class="pre">tdnn/exp/pretrained.pt</span></code> contains, we can use the following command:</p>
+<div class="highlight-python3 notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">torch</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="n">m</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="s2">&quot;tdnn/exp/pretrained.pt&quot;</span><span class="p">)</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="nb">list</span><span class="p">(</span><span class="n">m</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+<span class="go">[&#39;model&#39;]</span>
+<span class="gp">&gt;&gt;&gt; </span><span class="nb">list</span><span class="p">(</span><span class="n">m</span><span class="p">[</span><span class="s2">&quot;model&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span>
+<span class="go">[&#39;tdnn.0.weight&#39;, &#39;tdnn.0.bias&#39;, &#39;tdnn.2.running_mean&#39;, &#39;tdnn.2.running_var&#39;, &#39;tdnn.2.num_batches_tracked&#39;, &#39;tdnn.3.weight&#39;, &#39;tdnn.3.bias&#39;, &#39;tdnn.5.running_mean&#39;, &#39;tdnn.5.running_var&#39;, &#39;tdnn.5.num_batches_tracked&#39;, &#39;tdnn.6.weight&#39;, &#39;tdnn.6.bias&#39;, &#39;tdnn.8.running_mean&#39;, &#39;tdnn.8.running_var&#39;, &#39;tdnn.8.num_batches_tracked&#39;, &#39;output_linear.weight&#39;, &#39;output_linear.bias&#39;]</span>
+</pre></div>
+</div>
+<p>We can use <code class="docutils literal notranslate"><span class="pre">tdnn/exp/pretrained.pt</span></code> in the following way with <code class="docutils literal notranslate"><span class="pre">./tdnn/decode.py</span></code>:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>tdnn/exp
+ln<span class="w"> </span>-s<span class="w"> </span>pretrained.pt<span class="w"> </span>epoch-99.pt
+<span class="nb">cd</span><span class="w"> </span>../..
+
+./tdnn/decode.py<span class="w"> </span>--epoch<span class="w"> </span><span class="m">99</span><span class="w"> </span>--avg<span class="w"> </span><span class="m">1</span>
+</pre></div>
+</div>
+<p>The output logs of the above command are given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,089<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:262<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>started
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,090<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:263<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;exp_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;tdnn/exp&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lang_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;data/lang_phone&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;search_beam&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;output_beam&#39;</span>:<span class="w"> </span><span class="m">8</span>,<span class="w"> </span><span class="s1">&#39;min_active_states&#39;</span>:<span class="w"> </span><span class="m">30</span>,<span class="w"> </span><span class="s1">&#39;max_active_states&#39;</span>:<span class="w"> </span><span class="m">10000</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;epoch&#39;</span>:<span class="w"> </span><span class="m">99</span>,<span class="w"> </span><span class="s1">&#39;avg&#39;</span>:<span class="w"> </span><span class="m">1</span>,<span class="w"> </span><span class="s1">&#39;export&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;feature_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;data/fbank&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;max_duration&#39;</span>:<span class="w"> </span><span class="m">30</span>.0,<span class="w"> </span><span class="s1">&#39;bucketing_sampler&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;num_buckets&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;concatenate_cuts&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;duration_factor&#39;</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">&#39;gap&#39;</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">&#39;on_the_fly_feats&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;shuffle&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;return_cuts&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;num_workers&#39;</span>:<span class="w"> </span><span class="m">2</span>,<span class="w"> </span><span class="s1">&#39;env_info&#39;</span>:<span class="w"> </span><span class="o">{</span><span class="s1">&#39;k2-version&#39;</span>:<span class="w"> </span><span class="s1">&#39;1.24.3&#39;</span>,<span class="w"> </span><span class="s1">&#39;k2-build-type&#39;</span>:<span class="w"> </span><span class="s1">&#39;Release&#39;</span>,<span class="w"> </span><span class="s1">&#39;k2-with-cuda&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;k2-git-sha1&#39;</span>:<span class="w"> </span><span class="s1">&#39;ad79f1c699c684de9785ed6ca5edb805a41f78c3&#39;</span>,<span class="w"> </span><span class="s1">&#39;k2-git-date&#39;</span>:<span class="w"> </span><span class="s1">&#39;Wed Jul 26 09:30:42 2023&#39;</span>,<span class="w"> </span><span class="s1">&#39;lhotse-version&#39;</span>:<span class="w"> </span><span class="s1">&#39;1.16.0.dev+git.aa073f6.clean&#39;</span>,<span class="w"> </span><span class="s1">&#39;torch-version&#39;</span>:<span class="w"> </span><span class="s1">&#39;2.0.0&#39;</span>,<span class="w"> </span><span class="s1">&#39;torch-cuda-available&#39;</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">&#39;torch-cuda-version&#39;</span>:<span class="w"> </span>None,<span class="w"> </span><span class="s1">&#39;python-version&#39;</span>:<span class="w"> </span><span class="s1">&#39;3.1&#39;</span>,<span class="w"> </span><span class="s1">&#39;icefall-git-branch&#39;</span>:<span class="w"> </span><span class="s1">&#39;master&#39;</span>,<span class="w"> </span><span class="s1">&#39;icefall-git-sha1&#39;</span>:<span class="w"> </span><span class="s1">&#39;9a47c08-clean&#39;</span>,<span class="w"> </span><span class="s1">&#39;icefall-git-date&#39;</span>:<span class="w"> </span><span class="s1">&#39;Mon Aug 14 22:10:50 2023&#39;</span>,<span class="w"> </span><span class="s1">&#39;icefall-path&#39;</span>:<span class="w"> </span><span class="s1">&#39;/private/tmp/icefall&#39;</span>,<span class="w"> </span><span class="s1">&#39;k2-path&#39;</span>:<span class="w"> </span><span class="s1">&#39;/private/tmp/icefall_env/lib/python3.11/site-packages/k2/__init__.py&#39;</span>,<span class="w"> </span><span class="s1">&#39;lhotse-path&#39;</span>:<span class="w"> </span><span class="s1">&#39;/private/tmp/icefall_env/lib/python3.11/site-packages/lhotse/__init__.py&#39;</span>,<span class="w"> </span><span class="s1">&#39;hostname&#39;</span>:<span class="w"> </span><span class="s1">&#39;fangjuns-MacBook-Pro.local&#39;</span>,<span class="w"> </span><span class="s1">&#39;IP address&#39;</span>:<span class="w"> </span><span class="s1">&#39;127.0.0.1&#39;</span><span class="o">}}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,092<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>lexicon.py:168<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>pre-compiled<span class="w"> </span>data/lang_phone/Linv.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,103<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:272<span class="o">]</span><span class="w"> </span>device:<span class="w"> </span>cpu
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,109<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>checkpoint.py:112<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>checkpoint<span class="w"> </span>from<span class="w"> </span>tdnn/exp/epoch-99.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,115<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>asr_datamodule.py:218<span class="o">]</span><span class="w"> </span>About<span class="w"> </span>to<span class="w"> </span>get<span class="w"> </span><span class="nb">test</span><span class="w"> </span>cuts
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:48,115<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>asr_datamodule.py:253<span class="o">]</span><span class="w"> </span>About<span class="w"> </span>to<span class="w"> </span>get<span class="w"> </span><span class="nb">test</span><span class="w"> </span>cuts
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:50,386<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:203<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">0</span>/?,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">4</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:50,556<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:240<span class="o">]</span><span class="w"> </span>The<span class="w"> </span>transcripts<span class="w"> </span>are<span class="w"> </span>stored<span class="w"> </span><span class="k">in</span><span class="w"> </span>tdnn/exp/recogs-test_set.txt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:50,557<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>utils.py:564<span class="o">]</span><span class="w"> </span><span class="o">[</span>test_set<span class="o">]</span><span class="w"> </span>%WER<span class="w"> </span><span class="m">0</span>.42%<span class="w"> </span><span class="o">[</span><span class="m">1</span><span class="w"> </span>/<span class="w"> </span><span class="m">240</span>,<span class="w"> </span><span class="m">0</span><span class="w"> </span>ins,<span class="w"> </span><span class="m">1</span><span class="w"> </span>del,<span class="w"> </span><span class="m">0</span><span class="w"> </span>sub<span class="w"> </span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:50,558<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:248<span class="o">]</span><span class="w"> </span>Wrote<span class="w"> </span>detailed<span class="w"> </span>error<span class="w"> </span>stats<span class="w"> </span>to<span class="w"> </span>tdnn/exp/errs-test_set.txt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:45:50,559<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:315<span class="o">]</span><span class="w"> </span>Done!
+</pre></div>
+</div>
+<p>We can see that it produces an identical WER as before.</p>
+<p>We can also use it to decode files with the following command:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># ./tdnn/pretrained.py requires kaldifeat</span>
+<span class="c1">#</span>
+<span class="c1"># Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html</span>
+<span class="c1"># for how to install kaldifeat</span>
+
+pip<span class="w"> </span>install<span class="w"> </span><span class="nv">kaldifeat</span><span class="o">==</span><span class="m">1</span>.25.0.dev20230726+cpu.torch2.0.0<span class="w"> </span>-f<span class="w"> </span>https://csukuangfj.github.io/kaldifeat/cpu.html
+
+./tdnn/pretrained.py<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--checkpoint<span class="w"> </span>./tdnn/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--HLG<span class="w"> </span>./data/lang_phone/HLG.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--words-file<span class="w"> </span>./data/lang_phone/words.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_0_1_0_0_0_1.wav<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_1_0_0_0_1_0.wav
+</pre></div>
+</div>
+<p>The output is given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,208<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:136<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;num_classes&#39;</span>:<span class="w"> </span><span class="m">4</span>,<span class="w"> </span><span class="s1">&#39;sample_rate&#39;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s1">&#39;search_beam&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;output_beam&#39;</span>:<span class="w"> </span><span class="m">8</span>,<span class="w"> </span><span class="s1">&#39;min_active_states&#39;</span>:<span class="w"> </span><span class="m">30</span>,<span class="w"> </span><span class="s1">&#39;max_active_states&#39;</span>:<span class="w"> </span><span class="m">10000</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;checkpoint&#39;</span>:<span class="w"> </span><span class="s1">&#39;./tdnn/exp/pretrained.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;words_file&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/words.txt&#39;</span>,<span class="w"> </span><span class="s1">&#39;HLG&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/HLG.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;sound_files&#39;</span>:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,208<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:142<span class="o">]</span><span class="w"> </span>device:<span class="w"> </span>cpu
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,208<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:144<span class="o">]</span><span class="w"> </span>Creating<span class="w"> </span>model
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,212<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:156<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>HLG<span class="w"> </span>from<span class="w"> </span>./data/lang_phone/HLG.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,213<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:160<span class="o">]</span><span class="w"> </span>Constructing<span class="w"> </span>Fbank<span class="w"> </span>computer
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,213<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:170<span class="o">]</span><span class="w"> </span>Reading<span class="w"> </span>sound<span class="w"> </span>files:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,224<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:176<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>started
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,304<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:212<span class="o">]</span>
+download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES
+
+download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO
+
+
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:53:19,304<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>pretrained.py:214<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>Done
+</pre></div>
+</div>
+</section>
+<section id="export-via-torch-jit-script">
+<h2>Export via torch.jit.script()<a class="headerlink" href="#export-via-torch-jit-script" title="Permalink to this heading"></a></h2>
+<p>The command for this kind of export is</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp/icefall
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+<span class="nb">cd</span><span class="w"> </span>egs/yesno/ASR
+
+<span class="c1"># assume that &quot;--epoch 14 --avg 2&quot; produces the lowest WER.</span>
+
+./tdnn/export.py<span class="w"> </span>--epoch<span class="w"> </span><span class="m">14</span><span class="w"> </span>--avg<span class="w"> </span><span class="m">2</span><span class="w"> </span>--jit<span class="w"> </span><span class="nb">true</span>
+</pre></div>
+</div>
+<p>The output logs are given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:47:44,666<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:76<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;exp_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;tdnn/exp&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lang_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;data/lang_phone&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lr&#39;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;weight_decay&#39;</span>:<span class="w"> </span>1e-06,<span class="w"> </span><span class="s1">&#39;start_epoch&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;best_train_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_valid_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_train_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;best_valid_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;batch_idx_train&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;log_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reset_interval&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;valid_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;beam_size&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reduction&#39;</span>:<span class="w"> </span><span class="s1">&#39;sum&#39;</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;epoch&#39;</span>:<span class="w"> </span><span class="m">14</span>,<span class="w"> </span><span class="s1">&#39;avg&#39;</span>:<span class="w"> </span><span class="m">2</span>,<span class="w"> </span><span class="s1">&#39;jit&#39;</span>:<span class="w"> </span>True<span class="o">}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:47:44,667<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>lexicon.py:168<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>pre-compiled<span class="w"> </span>data/lang_phone/Linv.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:47:44,670<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:93<span class="o">]</span><span class="w"> </span>averaging<span class="w"> </span><span class="o">[</span><span class="s1">&#39;tdnn/exp/epoch-13.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;tdnn/exp/epoch-14.pt&#39;</span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:47:44,677<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:100<span class="o">]</span><span class="w"> </span>Using<span class="w"> </span>torch.jit.script
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:47:44,843<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export.py:104<span class="o">]</span><span class="w"> </span>Saved<span class="w"> </span>to<span class="w"> </span>tdnn/exp/cpu_jit.pt
+</pre></div>
+</div>
+<p>From the output logs we can see that the generated file is saved to <code class="docutils literal notranslate"><span class="pre">tdnn/exp/cpu_jit.pt</span></code>.</p>
+<p>Don’t be confused by the name <code class="docutils literal notranslate"><span class="pre">cpu_jit.pt</span></code>. The <code class="docutils literal notranslate"><span class="pre">cpu</span></code> part means the model is moved to
+CPU before exporting. That means, when you load it with:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>torch.jit.load<span class="o">()</span>
+</pre></div>
+</div>
+<p>you don’t need to specify the argument <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.jit.load.html#torch.jit.load">map_location</a>
+and it resides on CPU by default.</p>
+<p>To use <code class="docutils literal notranslate"><span class="pre">tdnn/exp/cpu_jit.pt</span></code> with <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a> to decode files, we can use:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># ./tdnn/jit_pretrained.py requires kaldifeat</span>
+<span class="c1">#</span>
+<span class="c1"># Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html</span>
+<span class="c1"># for how to install kaldifeat</span>
+
+pip<span class="w"> </span>install<span class="w"> </span><span class="nv">kaldifeat</span><span class="o">==</span><span class="m">1</span>.25.0.dev20230726+cpu.torch2.0.0<span class="w"> </span>-f<span class="w"> </span>https://csukuangfj.github.io/kaldifeat/cpu.html
+
+
+./tdnn/jit_pretrained.py<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--nn-model<span class="w"> </span>./tdnn/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--HLG<span class="w"> </span>./data/lang_phone/HLG.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--words-file<span class="w"> </span>./data/lang_phone/words.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_0_1_0_0_0_1.wav<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_1_0_0_0_1_0.wav
+</pre></div>
+</div>
+<p>The output is given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,603<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:121<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;num_classes&#39;</span>:<span class="w"> </span><span class="m">4</span>,<span class="w"> </span><span class="s1">&#39;sample_rate&#39;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s1">&#39;search_beam&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;output_beam&#39;</span>:<span class="w"> </span><span class="m">8</span>,<span class="w"> </span><span class="s1">&#39;min_active_states&#39;</span>:<span class="w"> </span><span class="m">30</span>,<span class="w"> </span><span class="s1">&#39;max_active_states&#39;</span>:<span class="w"> </span><span class="m">10000</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;nn_model&#39;</span>:<span class="w"> </span><span class="s1">&#39;./tdnn/exp/cpu_jit.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;words_file&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/words.txt&#39;</span>,<span class="w"> </span><span class="s1">&#39;HLG&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/HLG.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;sound_files&#39;</span>:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,603<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:127<span class="o">]</span><span class="w"> </span>device:<span class="w"> </span>cpu
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,603<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:129<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>torchscript<span class="w"> </span>model
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,640<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:134<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>HLG<span class="w"> </span>from<span class="w"> </span>./data/lang_phone/HLG.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,641<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:138<span class="o">]</span><span class="w"> </span>Constructing<span class="w"> </span>Fbank<span class="w"> </span>computer
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,641<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:148<span class="o">]</span><span class="w"> </span>Reading<span class="w"> </span>sound<span class="w"> </span>files:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,642<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:154<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>started
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,727<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:190<span class="o">]</span>
+download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES
+
+download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO
+
+
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:56:00,727<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>jit_pretrained.py:192<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>Done
+</pre></div>
+</div>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>We provide only code for <code class="docutils literal notranslate"><span class="pre">torch.jit.script()</span></code>. You can try <code class="docutils literal notranslate"><span class="pre">torch.jit.trace()</span></code>
+if you want.</p>
+</div>
+</section>
+<section id="export-via-torch-onnx-export">
+<h2>Export via torch.onnx.export()<a class="headerlink" href="#export-via-torch-onnx-export" title="Permalink to this heading"></a></h2>
+<p>The command for this kind of export is</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp/icefall
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+<span class="nb">cd</span><span class="w"> </span>egs/yesno/ASR
+
+<span class="c1"># tdnn/export_onnx.py requires onnx and onnxruntime</span>
+pip<span class="w"> </span>install<span class="w"> </span>onnx<span class="w"> </span>onnxruntime
+
+<span class="c1"># assume that &quot;--epoch 14 --avg 2&quot; produces the lowest WER.</span>
+
+./tdnn/export_onnx.py<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--epoch<span class="w"> </span><span class="m">14</span><span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--avg<span class="w"> </span><span class="m">2</span>
+</pre></div>
+</div>
+<p>The output logs are given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:20,888<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:83<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;exp_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;tdnn/exp&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lang_dir&#39;</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">&#39;data/lang_phone&#39;</span><span class="o">)</span>,<span class="w"> </span><span class="s1">&#39;lr&#39;</span>:<span class="w"> </span><span class="m">0</span>.01,<span class="w"> </span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;weight_decay&#39;</span>:<span class="w"> </span>1e-06,<span class="w"> </span><span class="s1">&#39;start_epoch&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;best_train_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_valid_loss&#39;</span>:<span class="w"> </span>inf,<span class="w"> </span><span class="s1">&#39;best_train_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;best_valid_epoch&#39;</span>:<span class="w"> </span>-1,<span class="w"> </span><span class="s1">&#39;batch_idx_train&#39;</span>:<span class="w"> </span><span class="m">0</span>,<span class="w"> </span><span class="s1">&#39;log_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reset_interval&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;valid_interval&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;beam_size&#39;</span>:<span class="w"> </span><span class="m">10</span>,<span class="w"> </span><span class="s1">&#39;reduction&#39;</span>:<span class="w"> </span><span class="s1">&#39;sum&#39;</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;epoch&#39;</span>:<span class="w"> </span><span class="m">14</span>,<span class="w"> </span><span class="s1">&#39;avg&#39;</span>:<span class="w"> </span><span class="m">2</span><span class="o">}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:20,888<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>lexicon.py:168<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>pre-compiled<span class="w"> </span>data/lang_phone/Linv.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:20,892<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:100<span class="o">]</span><span class="w"> </span>averaging<span class="w"> </span><span class="o">[</span><span class="s1">&#39;tdnn/exp/epoch-13.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;tdnn/exp/epoch-14.pt&#39;</span><span class="o">]</span>
+<span class="o">================</span><span class="w"> </span>Diagnostic<span class="w"> </span>Run<span class="w"> </span>torch.onnx.export<span class="w"> </span>version<span class="w"> </span><span class="m">2</span>.0.0<span class="w"> </span><span class="o">================</span>
+verbose:<span class="w"> </span>False,<span class="w"> </span>log<span class="w"> </span>level:<span class="w"> </span>Level.ERROR
+<span class="o">=======================</span><span class="w"> </span><span class="m">0</span><span class="w"> </span>NONE<span class="w"> </span><span class="m">0</span><span class="w"> </span>NOTE<span class="w"> </span><span class="m">0</span><span class="w"> </span>WARNING<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="nv">ERROR</span><span class="w"> </span><span class="o">========================</span>
+
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:21,047<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:127<span class="o">]</span><span class="w"> </span>Saved<span class="w"> </span>to<span class="w"> </span>tdnn/exp/model-epoch-14-avg-2.onnx
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:21,047<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:136<span class="o">]</span><span class="w"> </span>meta_data:<span class="w"> </span><span class="o">{</span><span class="s1">&#39;model_type&#39;</span>:<span class="w"> </span><span class="s1">&#39;tdnn&#39;</span>,<span class="w"> </span><span class="s1">&#39;version&#39;</span>:<span class="w"> </span><span class="s1">&#39;1&#39;</span>,<span class="w"> </span><span class="s1">&#39;model_author&#39;</span>:<span class="w"> </span><span class="s1">&#39;k2-fsa&#39;</span>,<span class="w"> </span><span class="s1">&#39;comment&#39;</span>:<span class="w"> </span><span class="s1">&#39;non-streaming tdnn for the yesno recipe&#39;</span>,<span class="w"> </span><span class="s1">&#39;vocab_size&#39;</span>:<span class="w"> </span><span class="m">4</span><span class="o">}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:21,049<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:140<span class="o">]</span><span class="w"> </span>Generate<span class="w"> </span>int8<span class="w"> </span>quantization<span class="w"> </span>models
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:21,075<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_quantizer.py:538<span class="o">]</span><span class="w"> </span>Quantization<span class="w"> </span>parameters<span class="w"> </span><span class="k">for</span><span class="w"> </span>tensor:<span class="s2">&quot;/Transpose_1_output_0&quot;</span><span class="w"> </span>not<span class="w"> </span>specified
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">20</span>:59:21,081<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>export_onnx.py:151<span class="o">]</span><span class="w"> </span>Saved<span class="w"> </span>to<span class="w"> </span>tdnn/exp/model-epoch-14-avg-2.int8.onnx
+</pre></div>
+</div>
+<p>We can see from the logs that it generates two files:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">tdnn/exp/model-epoch-14-avg-2.onnx</span></code> (ONNX model with <code class="docutils literal notranslate"><span class="pre">float32</span></code> weights)</p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">tdnn/exp/model-epoch-14-avg-2.int8.onnx</span></code> (ONNX model with <code class="docutils literal notranslate"><span class="pre">int8</span></code> weights)</p></li>
+</ul>
+</div></blockquote>
+<p>To use the generated ONNX model files for decoding with <a class="reference external" href="https://github.com/microsoft/onnxruntime">onnxruntime</a>, we can use</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># ./tdnn/onnx_pretrained.py requires kaldifeat</span>
+<span class="c1">#</span>
+<span class="c1"># Please refer to https://csukuangfj.github.io/kaldifeat/installation/from_wheels.html</span>
+<span class="c1"># for how to install kaldifeat</span>
+
+pip<span class="w"> </span>install<span class="w"> </span><span class="nv">kaldifeat</span><span class="o">==</span><span class="m">1</span>.25.0.dev20230726+cpu.torch2.0.0<span class="w"> </span>-f<span class="w"> </span>https://csukuangfj.github.io/kaldifeat/cpu.html
+
+./tdnn/onnx_pretrained.py<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--nn-model<span class="w"> </span>./tdnn/exp/model-epoch-14-avg-2.onnx<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--HLG<span class="w"> </span>./data/lang_phone/HLG.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--words-file<span class="w"> </span>./data/lang_phone/words.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_0_1_0_0_0_1.wav<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_1_0_0_0_1_0.wav
+</pre></div>
+</div>
+<p>The output is given below:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,260<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:166<span class="o">]</span><span class="w"> </span><span class="o">{</span><span class="s1">&#39;feature_dim&#39;</span>:<span class="w"> </span><span class="m">23</span>,<span class="w"> </span><span class="s1">&#39;sample_rate&#39;</span>:<span class="w"> </span><span class="m">8000</span>,<span class="w"> </span><span class="s1">&#39;search_beam&#39;</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">&#39;output_beam&#39;</span>:<span class="w"> </span><span class="m">8</span>,<span class="w"> </span><span class="s1">&#39;min_active_states&#39;</span>:<span class="w"> </span><span class="m">30</span>,<span class="w"> </span><span class="s1">&#39;max_active_states&#39;</span>:<span class="w"> </span><span class="m">10000</span>,<span class="w"> </span><span class="s1">&#39;use_double_scores&#39;</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">&#39;nn_model&#39;</span>:<span class="w"> </span><span class="s1">&#39;./tdnn/exp/model-epoch-14-avg-2.onnx&#39;</span>,<span class="w"> </span><span class="s1">&#39;words_file&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/words.txt&#39;</span>,<span class="w"> </span><span class="s1">&#39;HLG&#39;</span>:<span class="w"> </span><span class="s1">&#39;./data/lang_phone/HLG.pt&#39;</span>,<span class="w"> </span><span class="s1">&#39;sound_files&#39;</span>:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]}</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,260<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:171<span class="o">]</span><span class="w"> </span>device:<span class="w"> </span>cpu
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,260<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:173<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>onnx<span class="w"> </span>model<span class="w"> </span>./tdnn/exp/model-epoch-14-avg-2.onnx
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,267<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:176<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>HLG<span class="w"> </span>from<span class="w"> </span>./data/lang_phone/HLG.pt
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,270<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:180<span class="o">]</span><span class="w"> </span>Constructing<span class="w"> </span>Fbank<span class="w"> </span>computer
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,273<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:190<span class="o">]</span><span class="w"> </span>Reading<span class="w"> </span>sound<span class="w"> </span>files:<span class="w"> </span><span class="o">[</span><span class="s1">&#39;download/waves_yesno/0_0_0_1_0_0_0_1.wav&#39;</span>,<span class="w"> </span><span class="s1">&#39;download/waves_yesno/0_0_1_0_0_0_1_0.wav&#39;</span><span class="o">]</span>
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,279<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:196<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>started
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,318<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:232<span class="o">]</span>
+download/waves_yesno/0_0_0_1_0_0_0_1.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES
+
+download/waves_yesno/0_0_1_0_0_0_1_0.wav:
+NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>NO<span class="w"> </span>YES<span class="w"> </span>NO
+
+
+<span class="m">2023</span>-08-16<span class="w"> </span><span class="m">21</span>:03:24,318<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>onnx_pretrained.py:234<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>Done
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>To use the <code class="docutils literal notranslate"><span class="pre">int8</span></code> ONNX model for decoding, please use:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./tdnn/onnx_pretrained.py<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--nn-model<span class="w"> </span>./tdnn/exp/model-epoch-14-avg-2.onnx<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--HLG<span class="w"> </span>./data/lang_phone/HLG.pt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>--words-file<span class="w"> </span>./data/lang_phone/words.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_0_1_0_0_0_1.wav<span class="w"> </span><span class="se">\</span>
+<span class="w">  </span>download/waves_yesno/0_0_1_0_0_0_1_0.wav
+</pre></div>
+</div>
+</div>
+</section>
+<section id="for-the-more-curious">
+<h2>For the more curious<a class="headerlink" href="#for-the-more-curious" title="Permalink to this heading"></a></h2>
+<p>If you are wondering how to deploy the model without <code class="docutils literal notranslate"><span class="pre">torch</span></code>, please
+continue reading. We will show how to use <a class="reference external" href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a> to run the
+exported ONNX models, which depends only on <a class="reference external" href="https://github.com/microsoft/onnxruntime">onnxruntime</a> and does not
+depend on <code class="docutils literal notranslate"><span class="pre">torch</span></code>.</p>
+<p>In this tutorial, we will only demonstrate the usage of <a class="reference external" href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a> with the
+pre-trained model of the <a class="reference external" href="https://www.openslr.org/1/">yesno</a> recipe. There are also other two frameworks
+available:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><a href="#id1"><span class="problematic" id="id2">`sherpa`_</span></a>. It works with torchscript models.</p></li>
+<li><p><a class="reference external" href="https://github.com/k2-fsa/sherpa-ncnn">sherpa-ncnn</a>. It works with models exported using <span class="xref std std-ref">icefall_export_to_ncnn</span> with <a class="reference external" href="https://github.com/tencent/ncnn">ncnn</a></p></li>
+</ul>
+</div></blockquote>
+<p>Please see <a class="reference external" href="https://k2-fsa.github.io/sherpa/">https://k2-fsa.github.io/sherpa/</a> for further details.</p>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="decoding.html" class="btn btn-neutral float-left" title="Decoding" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../installation/index.html" class="btn btn-neutral float-right" title="Installation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/for-dummies/training.html b/for-dummies/training.html
new file mode 100644
index 000000000..55a94008c
--- /dev/null
+++ b/for-dummies/training.html
@@ -0,0 +1,159 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Training &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
+        <script src="../_static/doctools.js"></script>
+        <script src="../_static/sphinx_highlight.js"></script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Decoding" href="decoding.html" />
+    <link rel="prev" title="Data Preparation" href="data-preparation.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="index.html">Icefall for dummies tutorial</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2"><a class="reference internal" href="data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2 current"><a class="current reference internal" href="#">Training</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#for-the-more-curious">For the more curious</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="decoding.html">Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="model-export.html">Model Export</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../model-export/index.html">Model export</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="index.html">Icefall for dummies tutorial</a></li>
+      <li class="breadcrumb-item active">Training</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/for-dummies/training.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="training">
+<span id="dummies-tutorial-training"></span><h1>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h1>
+<p>After <a class="reference internal" href="data-preparation.html#dummies-tutorial-data-preparation"><span class="std std-ref">Data Preparation</span></a>, we can start training.</p>
+<p>The command to start the training is quite simple:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>/tmp/icefall
+<span class="nb">export</span><span class="w"> </span><span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
+<span class="nb">cd</span><span class="w"> </span>egs/yesno/ASR
+
+<span class="c1"># We use CPU for training by setting the following environment variable</span>
+<span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;&quot;</span>
+
+./tdnn/train.py
+</pre></div>
+</div>
+<p>That’s it!</p>
+<p>You can find the training logs below:</p>
+<section id="for-the-more-curious">
+<h2>For the more curious<a class="headerlink" href="#for-the-more-curious" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./tdnn/train.py<span class="w"> </span>--help
+</pre></div>
+</div>
+<p>will print the usage information about <code class="docutils literal notranslate"><span class="pre">./tdnn/train.py</span></code>. For instance, you
+can specify the number of epochs to train and the location to save the training
+results.</p>
+<p>The training text logs are saved in <code class="docutils literal notranslate"><span class="pre">tdnn/exp/log</span></code> while the tensorboard
+logs are in <code class="docutils literal notranslate"><span class="pre">tdnn/exp/tensorboard</span></code>.</p>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="data-preparation.html" class="btn btn-neutral float-left" title="Data Preparation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="decoding.html" class="btn btn-neutral float-right" title="Decoding" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/genindex.html b/genindex.html
index 22a6c8cc6..34367e393 100644
--- a/genindex.html
+++ b/genindex.html
@@ -41,6 +41,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/huggingface/index.html b/huggingface/index.html
index bdec7ec97..24c1bf550 100644
--- a/huggingface/index.html
+++ b/huggingface/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/huggingface/pretrained-models.html b/huggingface/pretrained-models.html
index 5e49cca4b..3630a7aa9 100644
--- a/huggingface/pretrained-models.html
+++ b/huggingface/pretrained-models.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/huggingface/spaces.html b/huggingface/spaces.html
index c4ba8a258..0ce4b4729 100644
--- a/huggingface/spaces.html
+++ b/huggingface/spaces.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/index.html b/index.html
index 0881531f4..c4607e83f 100644
--- a/index.html
+++ b/index.html
@@ -19,7 +19,7 @@
     <script src="_static/js/theme.js"></script>
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
-    <link rel="next" title="Installation" href="installation/index.html" /> 
+    <link rel="next" title="Icefall for dummies tutorial" href="for-dummies/index.html" /> 
 </head>
 
 <body class="wy-body-for-nav"> 
@@ -43,6 +43,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -91,6 +92,14 @@ speech recognition recipes using <a class="reference external" href="https://git
 <div class="toctree-wrapper compound">
 <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="for-dummies/index.html">Icefall for dummies tutorial</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="for-dummies/environment-setup.html">Environment setup</a></li>
+<li class="toctree-l2"><a class="reference internal" href="for-dummies/data-preparation.html">Data Preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="for-dummies/training.html">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="for-dummies/decoding.html">Decoding</a></li>
+<li class="toctree-l2"><a class="reference internal" href="for-dummies/model-export.html">Model Export</a></li>
+</ul>
+</li>
 <li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="installation/index.html#install-cuda-toolkit-and-cudnn">(0) Install CUDA toolkit and cuDNN</a></li>
 <li class="toctree-l2"><a class="reference internal" href="installation/index.html#install-torch-and-torchaudio">(1) Install torch and torchaudio</a></li>
@@ -177,7 +186,7 @@ speech recognition recipes using <a class="reference external" href="https://git
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="installation/index.html" class="btn btn-neutral float-right" title="Installation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="for-dummies/index.html" class="btn btn-neutral float-right" title="Icefall for dummies tutorial" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
diff --git a/installation/index.html b/installation/index.html
index a2dd8cd3b..05a5f1e8f 100644
--- a/installation/index.html
+++ b/installation/index.html
@@ -20,7 +20,7 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Docker" href="../docker/index.html" />
-    <link rel="prev" title="Icefall" href="../index.html" /> 
+    <link rel="prev" title="Model Export" href="../for-dummies/model-export.html" /> 
 </head>
 
 <body class="wy-body-for-nav"> 
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Installation</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="#install-cuda-toolkit-and-cudnn">(0) Install CUDA toolkit and cuDNN</a></li>
 <li class="toctree-l2"><a class="reference internal" href="#install-torch-and-torchaudio">(1) Install torch and torchaudio</a></li>
@@ -623,7 +624,7 @@ the following YouTube channel by <a class="reference external" href="https://www
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="../index.html" class="btn btn-neutral float-left" title="Icefall" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../for-dummies/model-export.html" class="btn btn-neutral float-left" title="Model Export" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="../docker/index.html" class="btn btn-neutral float-right" title="Docker" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
diff --git a/model-export/export-model-state-dict.html b/model-export/export-model-state-dict.html
index b47f85b9f..48b763f97 100644
--- a/model-export/export-model-state-dict.html
+++ b/model-export/export-model-state-dict.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-ncnn-conv-emformer.html b/model-export/export-ncnn-conv-emformer.html
index a9181e694..5ee146bec 100644
--- a/model-export/export-ncnn-conv-emformer.html
+++ b/model-export/export-ncnn-conv-emformer.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-ncnn-lstm.html b/model-export/export-ncnn-lstm.html
index ca3362211..a541ca787 100644
--- a/model-export/export-ncnn-lstm.html
+++ b/model-export/export-ncnn-lstm.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-ncnn-zipformer.html b/model-export/export-ncnn-zipformer.html
index 40f1706f2..c704d131c 100644
--- a/model-export/export-ncnn-zipformer.html
+++ b/model-export/export-ncnn-zipformer.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-ncnn.html b/model-export/export-ncnn.html
index c4d99bb47..9684f746a 100644
--- a/model-export/export-ncnn.html
+++ b/model-export/export-ncnn.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-onnx.html b/model-export/export-onnx.html
index 39de47310..acb989cb6 100644
--- a/model-export/export-onnx.html
+++ b/model-export/export-onnx.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-with-torch-jit-script.html b/model-export/export-with-torch-jit-script.html
index 0b27c4152..7d3aa6867 100644
--- a/model-export/export-with-torch-jit-script.html
+++ b/model-export/export-with-torch-jit-script.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/export-with-torch-jit-trace.html b/model-export/export-with-torch-jit-trace.html
index dea847ef2..862399600 100644
--- a/model-export/export-with-torch-jit-trace.html
+++ b/model-export/export-with-torch-jit-trace.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/model-export/index.html b/model-export/index.html
index f7c9d9f33..903cd481e 100644
--- a/model-export/index.html
+++ b/model-export/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/objects.inv b/objects.inv
index e5a0cdd1a3c43328ddf3e50e13ebb68aff8c8826..1a7f0ac089a262fbb1a560600e00a03e044f9c10 100644
GIT binary patch
delta 1762
zcmV<81|9j54a5$Rc7Iw;bK5o$z4KRK#yunlNYhTHhu#v~X*?4tuI0|8y>Ms}l2B75
zLx4`)U*E+CNq_`IP*N|gh24Go04{bHK+-ZJPi6TW(;_23;;JT9QYW021!6q=El-|E
zj_}W@CMm55TdvCjGhUPAK#LuIzj;{Z^r@z-A|%}}Gs-y4bAM^S*z-evm1=w}@v(`2
ztwl(RkRgiFvfwp+YB)Gq3JCD|niEMWD^o$pqawYck7b^h-x1#vBt%GxY*Yc`gM%@J
zb8U<wUZR^aZ4RX14wn1!JLV-$VQDzQ304c{b>UW5UOx%yB~mh!=(V@qsWtPVOX+wD
z0^$Xe!?vssq<_YTGNap946B66kE*OW$uKFzg4GyQFx!*5N^p@DgJ1wdl53l#`k&Oo
z7QPCApU|%hw343W_h>yK?FoT0GCU19GEOS|PWyrttJ6J8Yg%!vNW9PlS4HvT9D?r_
z=vL|=17Bevz()c<$_UUEG5?+NJ<gM2*X)uVQ8d`{^M99{2lI0TsH^WI4aFKftk>wV
zP70PaDXFLYjlO?~zVdjZHIdLnQWF%_eA$%Wny|F2g>|^^&PLC8g?bb*^YB@-eIhnC
z-ZrewP+=5oL&amG#*hXZGAYw%Qaje!7}x75M|uNZmres1R|a?DPw*m>NY)$<l(3jL
z5PUSr<9{sSi9ZjEeW^{BLetEsDxS3zw}5!4hHhX0-5cq4%%Ketzi+MK^VtHl!8Z0m
ziWgd!g-E9{BfP2n&J0y7RXtuudg!(vL2_iwgVzZyj?T5!@ObS29`zrQ&NwOy$$*rJ
z1^+{Vm&z`a>B?!=W-OhqREm<@SZXdnbD=cfynj8)0yw8+HVJ7J!a*u_6k?He<2LJx
z1=pJ_R=YFiWqe+d(PF!F^nP>}5CvEI(bS<dzT9eU)NAWZC`zy_NO}^WCs{(L#5iOQ
z^&=^=_`Ld~#UkLOiiTG|0A7{1Ulx!yq(G8FAW(Hlv&$JjoV~IF#3CgIrq}WF=5aN>
zet%up82?MF3xz_5WU=o1(@s7;iOvZ9-5MR4(>D3W%p@;<>(;X--x^3a`73(Qo}thA
zqajRWS%%(xeE$b~8*N?9sXgIi&Fz)4mu;KT8oOKGW97o)KUG%7MZ;AMypU$K=xf5!
z9Ph(&7XwG8H;qPRd?eJRIVD@M$3!!r#eb+x^cJ8Ql0IBhN5QXTvQ!Zlxk`)=$|^y9
zq1ln5pIH|@?P%L=z@O2++3mn$3we3G7$c!RhxEgiJ;a9O1IGouEj(BwMTHAdg!Opa
zb(&VJg8zCkm2^Z9squ)a4;ZVi%U+jJ>vpBJvDVg|+-QBq0TSsfjWmMoNDJjYv47m@
z)ZQB18G6y!$fn?gv?1~`8%{a!7y|X&cMy>^;POYszShh>Ar7n3h0lm4v!J7)3rUT^
zw}I1rMpG+Cgy`#}X7pGKDU5{>Ix8RhDL+ET!fq>CSj!Ru?`U#nc3ek4O%R3d8wU0$
z(;Y^TTe}}Qt+jB0mu>VOAs}>yUw`(?di^yDK3z<C@E=LS4kV!`jF~+8Q%TU7#6le9
z*|F5D+-h7vILdDIid#Wv(?DmY3UE;tMhk3Qn!;JXfl6rh!g8nQgt1fZV2{A5tX5d&
zhACF2c4}>z+)J4mx1H>3WK4ZxP3~X$9$HUpY5em_DCGS-hZeZdCx(<;*MGJ%O}i^m
zy$i1&uLHb~f+WyHdDBAye!2yXtyP}k)`g%iySfi)l=f*rp1~Pco-JoDyk)BU)|>Sj
z%S?QH;b$zkvCtrdX||~rgi+XrIU%^b=JNu~_jEF!#OUD#-I#bX6@F?y+&(-8(wXrI
zMrQh&R}63DaCCO&n$Kh^4Sya_Xkk81*d1Q;!4ARUk<(wN08IP@P5v``rqz5t9w5c6
zZ(V>H`XGd81Jc<f;Xr#1H839%TCMJmu;BDS`J8w*4D?62TircQdQe}J?&>By8eEoU
zAkg0*{+x&e6Jk~*It=H<;45K8IcH%F1Lf&2!q(-a2SIU;1EFus#DAQ`CalbCd?m(R
zUWV%Z>rM!GukCmpavKFD>dmQ1+HWZ1L?_nJX61A=%}}%pj-F+6sWP(ppG(LlAgP_Y
zgf4_(c66+rH)f2;cu+%;{oosVv1C8_irb|lkG_l~a7eT9)tKION`4Rck$%XHjThih
zPL4O=tHFcv@h9OyhFfo#_{SO5@G>*>%;wGgS7HpyF$`lE{9t(@kFCp8>l#%aPb0eu
zT#%nxA2dC22RDzM%+BYs8l?Cu`FjS5{zd5WSDJ&@sM+_L&ey67Uv7>#2HkM}2iX54
EKh~FDfdBvi

delta 1583
zcmV+~2GIG$4w4O!c7It*bK5o$zVlaLMm;14NYhTHhu$39X*?4tj-}3|y>Ms}l2B6w
zLx6tt*Jtq{3Gg9;l6pxjVE5Y(i^c8|h*?4&8G8z8mXOylFGwC0k)SL?TqJ*_(IZI_
z{uLD@ra9q@70WOe1&Izc+u^JA!y=`R1?4#*@qUp|E@+x6{eSsh9MV~A@Ug<jHvYAl
zLTU<GQ$fr!QP9UyfRTlS0Gn?qQIrxEOF{!8$}0NA(v-a*u_s7QAvNWqN|+%y7}Ge{
z)+iDT-LSYkkWBQj+_M)f7>>a;g5U^ihYgzO7uQxlOX>{iITRV+yXLgdhS1e)cL@^W
z8I!}76$et_1Aj~C_6>uVFnP^cAxMHrCLLB_5W##;iaf$uoV9`h6iI4y7WzLWg&lky
z0KcH$=V+-csr{%dA!7-FFtRMQIC4R9{K41)j}`Hr#|6y=)+C;3g!3$WJ%`|jIl5Ic
z$imkc2=I}_4-x`&Wz4@&vBznY?aE!WBbo+ZeEE9wV1I9ihq&G!r6_&yuv(!_5oJ6n
zV^WOS>+HXeyta5_B$1RvN)iP1dij{$g7BCXQrq8ltz%ZaL=B2ux&Ny9K9UC;Z%bZf
zC_f6WqUN#IW5|HDnXveY6g_^n#*IJKNTVPM)~f*HTH#Lq3071RG0~|bR*_dhi2tPm
zn@AiD?SEdiD!NT*>u^?Et|aFmIoFc!-XCQliYO+NNhmEJ4oa}55Xuu8RwpzR^@fCc
zCqu!)^AeJocGFB_$3%=w&dQFV4r%oH)<~m9+MF7hu2u3fivT&r5~?Z}5M}yDlqKPL
zxynq-<g7#ty#4|3Iu-slhY(W}iWGtgox=>flz)rE*`Adk4k<Y>?#D0d&2rp-b%zoD
zNAnAXLXG6GZhDz|etZ$N68fi7I<ls1^n=?8LH;&vN<n^<&`zXp=!!o<P5f38F0%qh
z?>=39<L`rQUnAo#_*f$})B38!TWwT-d$c22lkIBwYP3pS@Mo|ucRR4Qg@fBokex}>
z)PFR?jy|+~;5wQ??_vBlfYlMyxc3FYX1pq4wieIf-$6`0+aZYbctB&am8VyyaMv%L
zIxsWR*hrhQ(D9G$O|21_V~h-fYsgHhmeVn3&VHqIXUPR!C6_`kqzjR<q`hR%o(2;B
zP*<ap#O3?WCK*j`H`{6J!VRa*%&X7rLx0j^us86jkkDAzd+8&uch@e$b2S;tNhrNT
z?8od#83(&cd44s`2fn8YPAthsx5kS?y@#GfY`g?{nRN@+X|<;hoK?|#gn*Q3@9fvr
z>U-ebevVlf?pgghR^JwMB9EqR^=euj#7>^=&T8bPg4E*@qNnWUZ@8MMbxJ7Rx_<y?
zEVFtf>joywixMiK-7Kk-o)XSaxzITRr?Xn-s9UO7tGcPRVsfwM?6|Gxz6aLQM|^Vs
zCid{<=D22fT@i)6pQg|wWu~Q!g>wfqQMJBbF{ki)bM0Zh6C|E0+L{^iuv0rUbi6#l
zY#)O8a;`VhD(&NdB7u(}_3<&;@qdo0Cb79$t+2|(#~nYx;g+Le5|&w|T9^#{4$KL`
zHK;x<z=rN&r<3U1;-VWHPo=`o<;UBHjVGN6ThLK6cSdq}t%ifMGuLz`V`=bsLi4k6
z!s!@PZ=DeAA34qUHo%0B(BwbyCz?;^GXSKSHLVLULm%ZNTEpyQl7|C5hkqKVw+Stm
zcSl$VdZ1!TJX;E8Q|^{`n^6mTH|ee@{iC5z(+&js=fhtkkzhkiio_npX)*Xlc*dqU
z)>6=x{w93IMlDE+Qw&IXYbEv~)_!GXJyc@)%S&6lxhnPnZ=`#!n){W!67~AjBt5LC
z;>09Y-)7}>G{aD~3q3u{`hQYoWc@#vkhMorJJp4K2<`0H<GaC_u_EI^4+ZY)k?+Nl
z`+g*EmyWz3aT38tnv1W;%&F7!_kbUmSE0~)PV(i{^Pn_qy-po}UutFelFOIgynUJJ
z`^`3(`)6_tt1%2?80_E(TXyt<oNH9|c^bG?;DY?@`e5kEJGg1=R48^nmsKmp&(!Y;
hB$_9K%U@}>o+~EbYxX{8T=;Uc!_n)8^CydHzem;m7DE64

diff --git a/recipes/Non-streaming-ASR/aishell/conformer_ctc.html b/recipes/Non-streaming-ASR/aishell/conformer_ctc.html
index 7190287e9..e67cb9756 100644
--- a/recipes/Non-streaming-ASR/aishell/conformer_ctc.html
+++ b/recipes/Non-streaming-ASR/aishell/conformer_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/aishell/index.html b/recipes/Non-streaming-ASR/aishell/index.html
index 7f4b82483..8e77ba749 100644
--- a/recipes/Non-streaming-ASR/aishell/index.html
+++ b/recipes/Non-streaming-ASR/aishell/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
index e113e3ec9..a31867a72 100644
--- a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
+++ b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
index 4ae9ab7a8..031ef183e 100644
--- a/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/index.html b/recipes/Non-streaming-ASR/index.html
index 2cbd9a402..00095e39a 100644
--- a/recipes/Non-streaming-ASR/index.html
+++ b/recipes/Non-streaming-ASR/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html b/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
index 479623dc8..8d85ab836 100644
--- a/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
+++ b/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -347,10 +348,10 @@ $<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w">
 <p>Note there is a URL in the above output, click it and you will see
 the following screenshot:</p>
 <blockquote>
-<div><figure class="align-center" id="id6">
+<div><figure class="align-center" id="id7">
 <a class="reference external image-reference" href="https://tensorboard.dev/experiment/lzGnETjwRxC3yghNMd4kPw/"><img alt="TensorBoard screenshot" src="../../../_images/librispeech-conformer-ctc-tensorboard-log.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 4 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id6" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 4 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id7" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/recipes/Non-streaming-ASR/librispeech/distillation.html b/recipes/Non-streaming-ASR/librispeech/distillation.html
index f22f925f7..c95a111f9 100644
--- a/recipes/Non-streaming-ASR/librispeech/distillation.html
+++ b/recipes/Non-streaming-ASR/librispeech/distillation.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -230,10 +231,10 @@ and prepares MVQ-augmented training manifests.</p>
 </div>
 <p>Please see the
 following screenshot for the output of an example execution.</p>
-<figure class="align-center" id="id4">
+<figure class="align-center" id="id5">
 <a class="reference internal image-reference" href="../../../_images/distillation_codebook.png"><img alt="Downloading codebook indexes and preparing training manifest." src="../../../_images/distillation_codebook.png" style="width: 800px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 6 </span><span class="caption-text">Downloading codebook indexes and preparing training manifest.</span><a class="headerlink" href="#id4" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 6 </span><span class="caption-text">Downloading codebook indexes and preparing training manifest.</span><a class="headerlink" href="#id5" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 <div class="admonition hint">
@@ -245,10 +246,10 @@ set <code class="docutils literal notranslate"><span class="pre">use_extracted_c
 <code class="docutils literal notranslate"><span class="pre">num_codebooks</span></code> by yourself.</p>
 </div>
 <p>Now, you should see the following files under the directory <code class="docutils literal notranslate"><span class="pre">./data/vq_fbank_layer36_cb8</span></code>.</p>
-<figure class="align-center" id="id5">
+<figure class="align-center" id="id6">
 <a class="reference internal image-reference" href="../../../_images/distillation_directory.png"><img alt="MVQ-augmented training manifests" src="../../../_images/distillation_directory.png" style="width: 800px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 7 </span><span class="caption-text">MVQ-augmented training manifests.</span><a class="headerlink" href="#id5" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 7 </span><span class="caption-text">MVQ-augmented training manifests.</span><a class="headerlink" href="#id6" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 <p>Whola! You are ready to perform knowledge distillation training now!</p>
diff --git a/recipes/Non-streaming-ASR/librispeech/index.html b/recipes/Non-streaming-ASR/librispeech/index.html
index fdf26a2a0..0799c43f6 100644
--- a/recipes/Non-streaming-ASR/librispeech/index.html
+++ b/recipes/Non-streaming-ASR/librispeech/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html b/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
index b0ed38063..d8d40a210 100644
--- a/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
+++ b/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -385,10 +386,10 @@ $<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w">
 <p>Note there is a URL in the above output. Click it and you will see
 the following screenshot:</p>
 <blockquote>
-<div><figure class="align-center" id="id4">
+<div><figure class="align-center" id="id5">
 <a class="reference external image-reference" href="https://tensorboard.dev/experiment/QOGSPBgsR8KzcRMmie9JGw/"><img alt="TensorBoard screenshot" src="../../../_images/librispeech-pruned-transducer-tensorboard-log.jpg" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 5 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id4" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 5 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id5" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
index 8a937cc57..6fe3338a7 100644
--- a/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html b/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
index cda50aa1a..a46e0ac59 100644
--- a/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
+++ b/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html b/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
index a9742351d..e718e0ac6 100644
--- a/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
+++ b/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/timit/index.html b/recipes/Non-streaming-ASR/timit/index.html
index 76d9fc46d..ee657dc22 100644
--- a/recipes/Non-streaming-ASR/timit/index.html
+++ b/recipes/Non-streaming-ASR/timit/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html b/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
index f9497ad4c..f083f61ba 100644
--- a/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
+++ b/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
index 0ea873b3d..0895b4372 100644
--- a/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/yesno/index.html b/recipes/Non-streaming-ASR/yesno/index.html
index 1fa1bd05a..6e32df2ed 100644
--- a/recipes/Non-streaming-ASR/yesno/index.html
+++ b/recipes/Non-streaming-ASR/yesno/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Non-streaming-ASR/yesno/tdnn.html b/recipes/Non-streaming-ASR/yesno/tdnn.html
index 6d4ab227c..73f0788eb 100644
--- a/recipes/Non-streaming-ASR/yesno/tdnn.html
+++ b/recipes/Non-streaming-ASR/yesno/tdnn.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Streaming-ASR/index.html b/recipes/Streaming-ASR/index.html
index 5e6a4fcf6..829dfd048 100644
--- a/recipes/Streaming-ASR/index.html
+++ b/recipes/Streaming-ASR/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Streaming-ASR/introduction.html b/recipes/Streaming-ASR/introduction.html
index 8f2b70aee..7765cb823 100644
--- a/recipes/Streaming-ASR/introduction.html
+++ b/recipes/Streaming-ASR/introduction.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Streaming-ASR/librispeech/index.html b/recipes/Streaming-ASR/librispeech/index.html
index 299b0e35d..04b450fa2 100644
--- a/recipes/Streaming-ASR/librispeech/index.html
+++ b/recipes/Streaming-ASR/librispeech/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html b/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
index 37e44afe3..221997647 100644
--- a/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
+++ b/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -384,10 +385,10 @@ $<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w">
 <p>Note there is a URL in the above output. Click it and you will see
 the following screenshot:</p>
 <blockquote>
-<div><figure class="align-center" id="id5">
+<div><figure class="align-center" id="id6">
 <a class="reference external image-reference" href="https://tensorboard.dev/experiment/lzGnETjwRxC3yghNMd4kPw/"><img alt="TensorBoard screenshot" src="../../../_images/librispeech-lstm-transducer-tensorboard-log.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 10 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id5" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 10 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id6" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html b/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
index fc0992307..0c7443472 100644
--- a/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
+++ b/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
@@ -404,10 +405,10 @@ $<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w">
 <p>Note there is a URL in the above output. Click it and you will see
 the following screenshot:</p>
 <blockquote>
-<div><figure class="align-center" id="id5">
+<div><figure class="align-center" id="id6">
 <a class="reference external image-reference" href="https://tensorboard.dev/experiment/97VKXf80Ru61CnP2ALWZZg/"><img alt="TensorBoard screenshot" src="../../../_images/streaming-librispeech-pruned-transducer-tensorboard-log.jpg" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 9 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id5" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 9 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id6" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/recipes/Streaming-ASR/librispeech/zipformer_transducer.html b/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
index abbaebc39..497d195ee 100644
--- a/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
+++ b/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/recipes/index.html b/recipes/index.html
index 00df56533..dda9ce7f1 100644
--- a/recipes/index.html
+++ b/recipes/index.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="../for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/search.html b/search.html
index 69de69880..37216d469 100644
--- a/search.html
+++ b/search.html
@@ -44,6 +44,7 @@
         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
 <ul>
+<li class="toctree-l1"><a class="reference internal" href="for-dummies/index.html">Icefall for dummies tutorial</a></li>
 <li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="docker/index.html">Docker</a></li>
 <li class="toctree-l1"><a class="reference internal" href="faqs.html">Frequently Asked Questions (FAQs)</a></li>
diff --git a/searchindex.js b/searchindex.js
index 494d063de..57183504a 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 30, 31, 34, 38, 39, 41, 43], "tool": [0, 10, 15, 18], "make": [0, 1, 3, 18, 19, 20, 25, 27, 30, 43], "consist": [0, 27, 33, 45, 46, 47], "possibl": [0, 2, 3, 25, 30], "black": 0, "format": [0, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "flake8": 0, "check": [0, 15, 30], "qualiti": [0, 26], "isort": 0, "sort": [0, 15], "import": [0, 9, 10, 15, 18, 46, 47], "The": [0, 1, 2, 4, 7, 9, 10, 13, 15, 16, 18, 19, 20, 25, 26, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "version": [0, 9, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 38, 39, 46], "abov": [0, 4, 6, 7, 10, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 15, 16, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "22": [0, 9, 15, 18, 19, 30, 38, 39, 41], "3": [0, 4, 6, 7, 9, 10, 14, 16, 17, 21, 24, 28, 31, 33, 34, 35, 36, 41, 45, 46, 47], "0": [0, 1, 4, 6, 7, 9, 14, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "5": [0, 7, 17, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "4": [0, 4, 5, 6, 7, 9, 10, 14, 16, 17, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "10": [0, 7, 9, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "1": [0, 4, 6, 7, 9, 14, 16, 17, 21, 22, 23, 24, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "after": [0, 1, 6, 9, 13, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "run": [0, 2, 8, 10, 13, 14, 15, 18, 19, 20, 21, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "command": [0, 1, 4, 6, 7, 9, 10, 15, 16, 18, 19, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "git": [0, 4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "clone": [0, 4, 6, 7, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "http": [0, 1, 2, 4, 6, 7, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "github": [0, 2, 6, 9, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "com": [0, 2, 6, 9, 12, 13, 15, 16, 18, 19, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "k2": [0, 2, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "fsa": [0, 2, 9, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 30, 33, 35, 36, 45, 46, 47], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 16, 17, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "cd": [0, 1, 2, 9, 10, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "pip": [0, 1, 6, 10, 15, 18, 21, 27], "instal": [0, 1, 4, 6, 10, 11, 13, 14, 16, 17, 21, 24, 31, 33, 35, 36, 41, 45, 46, 47], "pre": [0, 3, 4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 24, 31], "commit": [0, 15], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "automat": [0, 13, 31], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 13, 18, 19, 20, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "ani": [0, 4, 6, 7, 15, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 14, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "wa": [0, 16, 30, 34], "success": [0, 15, 18, 19], "pleas": [0, 1, 2, 4, 6, 7, 9, 10, 13, 15, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "fix": [0, 9, 10, 18, 19, 20, 30], "issu": [0, 4, 6, 7, 10, 15, 18, 19, 30, 31, 46, 47], "report": [0, 9, 10, 31], "some": [0, 1, 4, 6, 16, 18, 19, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "i": [0, 1, 2, 4, 7, 9, 10, 13, 15, 16, 17, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "e": [0, 2, 4, 5, 6, 7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "modifi": [0, 17, 24, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "file": [0, 2, 9, 13, 14, 16, 18, 19, 20, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "place": [0, 15, 16, 27, 30, 34], "so": [0, 4, 6, 7, 9, 13, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 13, 15, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "which": [0, 2, 4, 6, 7, 9, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 46, 47], "ha": [0, 2, 14, 15, 17, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 43, 45, 46, 47], "been": [0, 15, 17, 18, 19, 20, 27], "befor": [0, 1, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "further": [0, 4, 6, 7], "chang": [0, 4, 6, 7, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "all": [0, 9, 12, 13, 16, 18, 19, 20, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "again": [0, 18, 19, 41], "should": [0, 2, 4, 6, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "time": [0, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "succeed": 0, "want": [0, 4, 6, 7, 15, 16, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "do": [0, 2, 4, 6, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "Or": 0, "without": [0, 4, 6, 7, 9, 11, 13, 25, 30], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 15, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 12, 13, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "prepar": [1, 3, 4, 8, 16], "environ": [1, 10, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "doc": [1, 16, 43], "r": [1, 15, 18, 19, 20, 38, 39], "requir": [1, 4, 6, 15, 20, 31, 46, 47], "txt": [1, 4, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 34, 38, 39, 41], "set": [1, 4, 6, 7, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "up": [1, 15, 16, 18, 19, 20, 25, 28, 30, 31, 33, 34, 35, 36, 46, 47], "readi": [1, 25, 30, 31], "refer": [1, 2, 6, 7, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 28, 30, 33, 34, 35, 38, 39, 41, 43, 46, 47], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 16, 18, 19, 20, 25, 27, 30], "local": [1, 9, 15, 33, 35, 36, 45, 46, 47], "preview": 1, "what": [1, 2, 15, 18, 19, 20, 27, 43], "look": [1, 2, 4, 6, 7, 12, 15, 18, 19, 20, 25, 27, 28, 30, 31], "like": [1, 2, 9, 13, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46], "publish": [1, 16, 26], "html": [1, 2, 10, 15, 17, 18, 19, 20, 21, 22, 23, 33, 45, 46, 47], "gener": [1, 6, 9, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "view": [1, 8, 14, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "python3": [1, 9, 10, 15, 19, 20], "m": [1, 15, 18, 19, 20, 27, 33, 35, 36, 38, 39, 45, 46, 47], "server": [1, 13, 45], "It": [1, 2, 6, 7, 9, 11, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "print": [1, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "serv": [1, 33, 35, 36, 45, 46, 47], "port": [1, 31, 33, 35, 36, 45, 46, 47], "8000": [1, 41], "open": [1, 4, 6, 7, 9, 14, 16, 18, 19, 20, 26, 27, 30, 31], "browser": [1, 11, 13, 33, 35, 36, 45, 46, 47], "go": [1, 7, 25, 27, 30, 33, 35, 36, 45, 46, 47], "read": [2, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "code": [2, 3, 8, 10, 14, 15, 18, 19, 20, 25, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47], "style": [2, 3, 14], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 30, 33, 35, 36, 45, 46, 47], "recommend": [2, 6, 7, 15, 25, 27, 28, 30, 31, 33, 46, 47], "test": [2, 4, 9, 14, 16, 17, 24, 25, 27, 28, 30, 31, 34, 35, 38, 39], "valid": [2, 15, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dataset": [2, 10, 15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "lhots": [2, 9, 14, 16, 18, 19, 20, 25, 27, 30], "readthedoc": [2, 15], "io": [2, 15, 17, 18, 19, 20, 21, 22, 23, 33, 45, 46, 47], "en": [2, 15, 18], "latest": [2, 9, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "index": [2, 15, 17, 18, 19, 20, 21, 22, 23, 45, 46, 47], "yesno": [2, 8, 10, 14, 15, 29, 41, 48], "veri": [2, 3, 7, 18, 19, 20, 27, 38, 39, 41, 46, 47], "good": [2, 7], "exampl": [2, 13, 14, 16, 18, 19, 20, 22, 23, 24, 31, 34, 38, 39, 41], "speech": [2, 13, 14, 15, 17, 26, 27, 41, 48], "pull": [2, 4, 6, 7, 9, 18, 19, 20, 21, 25, 27, 30, 43], "380": [2, 18, 39], "show": [2, 4, 6, 7, 9, 13, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "add": [2, 18, 19, 20, 25, 27, 28, 46, 48], "new": [2, 3, 9, 13, 15, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 41, 45, 46, 47], "suppos": [2, 9, 46, 47], "would": [2, 16, 18, 19, 20, 30, 34, 46, 47], "name": [2, 9, 10, 16, 18, 19, 20, 21, 25, 27, 33, 35, 36, 46, 47], "foo": [2, 23, 25, 30, 33, 35, 36, 45, 46, 47], "eg": [2, 9, 10, 12, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "mkdir": [2, 18, 19, 25, 27, 28, 30, 34, 38, 39, 41], "p": [2, 4, 15, 18, 19, 27, 38, 39], "asr": [2, 4, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "touch": 2, "sh": [2, 9, 15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "chmod": 2, "x": [2, 4, 20, 43], "simpl": [2, 15, 27], "own": [2, 31, 33, 46, 47], "otherwis": [2, 18, 19, 20, 25, 27, 30, 31, 33, 35, 36, 45, 46, 47], "librispeech": [2, 4, 6, 7, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 29, 30, 31, 33, 34, 35, 36, 42, 43, 45, 46, 47, 48], "assum": [2, 4, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 45, 46, 47], "fanci": 2, "call": [2, 10, 21, 31], "bar": [2, 23, 25, 30, 33, 35, 36, 45, 46, 47], "organ": 2, "wai": [2, 3, 24, 33, 35, 36, 43, 45, 46, 47], "readm": [2, 25, 27, 28, 30, 34, 38, 39, 41], "md": [2, 12, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "asr_datamodul": [2, 9, 10, 15], "pretrain": [2, 4, 6, 7, 16, 18, 19, 20, 21, 23, 25, 27, 28, 30, 34, 38, 39, 41], "For": [2, 4, 6, 7, 10, 12, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "instanc": [2, 10, 12, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "tdnn": [2, 9, 10, 15, 26, 29, 32, 37, 40], "its": [2, 4, 16, 17, 18, 19, 20, 23, 27, 35], "directori": [2, 9, 14, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "structur": [2, 20], "descript": [2, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "contain": [2, 8, 14, 16, 17, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "inform": [2, 4, 6, 15, 16, 25, 27, 28, 30, 33, 34, 35, 38, 39, 41, 43, 45, 46, 47], "g": [2, 4, 5, 6, 7, 15, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "wer": [2, 9, 15, 16, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "etc": [2, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "provid": [2, 13, 15, 16, 17, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "pytorch": [2, 10, 15, 18, 19, 20, 27], "dataload": [2, 15], "take": [2, 7, 9, 16, 31, 33, 41, 46, 47], "input": [2, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41, 43], "checkpoint": [2, 4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "save": [2, 15, 16, 19, 20, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dure": [2, 4, 5, 7, 10, 13, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "stage": [2, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "": [2, 4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "definit": [2, 18, 19], "neural": [2, 4, 6, 7, 25, 30], "network": [2, 25, 27, 30, 33, 35, 36, 45, 46, 47], "script": [2, 6, 7, 14, 15, 23, 24, 25, 27, 28, 30, 31, 34, 38, 39, 41, 45], "infer": [2, 16, 18, 19], "tdnn_lstm_ctc": [2, 28, 34, 39], "conformer_ctc": [2, 25, 30], "get": [2, 9, 13, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 41, 43, 45, 46, 47], "feel": [2, 31, 45], "result": [2, 4, 7, 9, 12, 13, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "everi": [2, 16, 33, 35, 36, 45, 46, 47], "kept": [2, 33, 46, 47], "self": [2, 17, 20, 43], "toler": 2, "duplic": 2, "among": [2, 15], "differ": [2, 15, 18, 19, 20, 21, 25, 26, 30, 31, 33, 43, 45, 46, 47], "invoc": [2, 18, 19], "help": [2, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "blob": [2, 12, 15, 16, 23, 33, 35, 36, 45, 46, 47], "master": [2, 6, 9, 12, 15, 16, 19, 20, 22, 23, 27, 31, 33, 35, 36, 45, 46, 47], "transform": [2, 6, 7, 25, 30, 45], "conform": [2, 22, 26, 27, 29, 32, 33, 35, 45, 46, 47], "base": [2, 4, 7, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "lstm": [2, 17, 23, 24, 26, 29, 32, 37, 42, 44], "attent": [2, 20, 27, 28, 31, 43, 46, 47], "lm": [2, 4, 7, 9, 14, 15, 27, 33, 34, 38, 39, 41, 46, 47], "rescor": [2, 14, 28, 34, 36, 38, 39, 41], "demonstr": [2, 11, 13, 16, 21], "consid": [2, 4, 20], "colab": [2, 15], "notebook": [2, 15], "welcom": 3, "There": [3, 4, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "mani": [3, 15, 46, 47], "two": [3, 4, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "them": [3, 5, 6, 11, 12, 13, 18, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "To": [3, 4, 6, 7, 13, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "document": [3, 14, 16, 17, 18, 19, 20, 21, 36], "repositori": [3, 9, 18, 19, 20, 21], "recip": [3, 4, 6, 7, 9, 12, 14, 15, 16, 21, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 43, 45, 46, 47], "In": [3, 4, 6, 10, 13, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 34, 38, 39, 41, 43], "page": [3, 13, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "describ": [3, 5, 8, 9, 11, 16, 18, 19, 21, 22, 23, 24, 25, 27, 28, 30, 33, 34, 38, 39, 46, 47], "how": [3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 18, 19, 20, 21, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "creat": [3, 4, 6, 7, 14, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46], "data": [3, 4, 6, 7, 8, 16, 18, 19, 20, 21, 22, 23, 26], "train": [3, 4, 6, 7, 8, 10, 11, 13, 14, 16, 17, 22, 23, 24, 43], "decod": [3, 4, 8, 10, 13, 14, 18, 19, 20, 23, 24], "model": [3, 4, 6, 7, 9, 11, 13, 14, 15, 17, 31, 43], "As": [4, 5, 6, 7, 18, 27, 30, 31], "type": [4, 6, 7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 33, 35, 36, 41, 43, 45, 46, 47], "e2": [4, 7, 15], "usual": [4, 6, 7, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "an": [4, 5, 6, 7, 9, 13, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 30, 31, 33, 36, 41, 45, 46, 47], "intern": [4, 5], "languag": [4, 7, 13, 14, 25, 27, 28], "learn": [4, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "level": [4, 5], "corpu": [4, 6, 7, 26], "real": 4, "life": 4, "scenario": 4, "often": [4, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "mismatch": [4, 46], "between": [4, 7, 33, 46, 47], "target": [4, 13, 15], "space": [4, 11, 14], "problem": [4, 6, 7, 15, 31], "when": [4, 6, 9, 10, 13, 18, 19, 20, 24, 27, 30, 31, 33, 35, 36, 46, 47], "act": 4, "against": [4, 15], "extern": [4, 5, 6, 7], "tutori": [4, 6, 7, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "low": [4, 18, 19], "order": [4, 15, 18, 19, 20, 25, 28, 30, 34, 38, 39], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 20], "improv": [4, 5, 6, 7, 27], "perform": [4, 6, 7, 17, 27, 31, 46], "languga": 4, "integr": [4, 13], "pruned_transducer_stateless7_stream": [4, 6, 7, 20, 21, 47], "stream": [4, 6, 7, 14, 17, 18, 19, 21, 24, 25, 30, 38, 39, 45, 48], "howev": [4, 6, 7, 16, 19, 31], "easili": [4, 6, 7, 25, 28, 30], "appli": [4, 6, 7, 27, 43], "other": [4, 7, 16, 19, 20, 21, 27, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47, 48], "encount": [4, 6, 7, 10, 15, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "here": [4, 6, 7, 16, 18, 19, 20, 25, 27, 28, 30, 31, 34, 43, 46], "simplic": [4, 6, 7], "same": [4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 12, 22, 45], "first": [4, 6, 9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "let": [4, 6, 7, 15, 18, 19, 20, 25, 30], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 27, 43, 47], "address": [4, 9, 13, 15, 16, 18, 19, 20, 27, 33, 36, 45, 46, 47], "sourc": [4, 15, 16, 18, 19, 20, 25, 26, 27, 30], "acoust": [4, 46, 47], "similar": [4, 5, 31, 35, 46, 47], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "score": [4, 5, 7, 25, 30, 33, 46, 47], "left": [4, 18, 20, 27, 46, 47], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 18, 27, 43, 46], "log": [4, 9, 10, 15, 18, 19, 20, 34, 38, 39, 41], "y_": 4, "u": [4, 15, 18, 19, 20, 25, 27, 28, 30, 31, 41], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 10, 46], "weight": [4, 25, 28, 30, 35, 36, 45], "respect": 4, "onli": [4, 6, 8, 9, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "compar": [4, 18, 19, 20, 46], "shallow": [4, 14], "fusion": [4, 14], "subtract": [4, 5], "work": [4, 9, 18, 19, 20, 30], "treat": [4, 19, 20], "predictor": 4, "joiner": [4, 18, 19, 20, 21, 23, 27, 33, 45, 46, 47], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 15, 25, 31, 33, 35, 36, 38, 39, 45, 46, 47], "gram": [4, 6, 15, 25, 27, 28, 33, 34, 36, 38, 39, 46, 47], "approxim": [4, 5], "ilm": 4, "lead": [4, 7], "formula": 4, "rnnt": [4, 33, 46, 47], "bi": [4, 6], "addit": 4, "estim": 4, "comar": 4, "li": 4, "choic": 4, "accord": 4, "origin": [4, 5], "paper": [4, 5, 31, 33, 45, 46, 47], "achiev": [4, 6, 7, 43], "both": [4, 33, 35, 36, 43, 45, 46, 47], "intra": 4, "cross": 4, "much": [4, 18, 19], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 15, 18, 19, 20, 25, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "illustr": [4, 6, 7], "purpos": [4, 6, 7, 18, 19], "from": [4, 6, 7, 9, 10, 11, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "link": [4, 6, 7, 12, 15, 16, 17, 33, 35, 36, 45, 46, 47], "scratch": [4, 6, 7, 33, 35, 36, 45, 46, 47], "prune": [4, 6, 7, 16, 20, 21, 27, 29, 31, 32, 42, 43, 44, 45, 47], "statelessx": [4, 6, 7, 29, 31, 32, 42, 43, 44], "initi": [4, 6, 7, 9, 25, 28], "step": [4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "download": [4, 6, 7, 8, 10, 13, 14, 17, 24, 26, 31], "git_lfs_skip_smudg": [4, 6, 7, 18, 19, 20, 21], "huggingfac": [4, 6, 7, 12, 14, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 35, 36, 38, 39, 41, 45], "co": [4, 6, 7, 12, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 34, 35, 36, 38, 39, 41, 45], "zengwei": [4, 6, 7, 18, 20, 21, 36, 45], "stateless7": [4, 6, 7, 20, 21], "2022": [4, 6, 7, 16, 18, 19, 20, 21, 27, 33, 35, 36, 45, 46], "12": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 33, 35, 36, 38, 41, 45, 46, 47], "29": [4, 6, 7, 15, 20, 21, 25, 27, 28, 30, 34, 35, 38, 39], "pushd": [4, 6, 7, 21], "exp": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "lf": [4, 6, 7, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 36, 38, 39, 41], "includ": [4, 6, 7, 18, 19, 20, 21, 33, 35, 36, 45, 46, 47], "pt": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "ln": [4, 6, 7, 9, 16, 18, 19, 20, 21, 25, 30, 33, 35, 36, 45, 46, 47], "epoch": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "99": [4, 6, 7, 15, 18, 19, 20, 21], "symbol": [4, 5, 6, 7, 15, 27, 33, 46, 47], "load": [4, 6, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "done": [4, 6, 7, 9, 15, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "via": [4, 6, 7, 15, 17, 22, 23, 24], "exp_dir": [4, 6, 7, 9, 15, 18, 19, 20, 27, 30, 31, 33, 35, 36, 46, 47], "avg": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 27, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "averag": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "fals": [4, 6, 7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 31], "dir": [4, 6, 7, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "bpe": [4, 5, 6, 7, 16, 18, 19, 20, 21, 23, 30, 33, 35, 36, 45, 46, 47], "lang_bpe_500": [4, 6, 7, 16, 18, 19, 20, 21, 22, 23, 30, 33, 35, 36, 45, 46, 47], "max": [4, 6, 7, 15, 16, 18, 19, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "durat": [4, 6, 7, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "600": [4, 6, 7, 15, 16, 30, 33, 35, 45, 46, 47], "chunk": [4, 6, 7, 18, 20, 21, 46, 47], "len": [4, 6, 7, 20, 21, 47], "32": [4, 6, 7, 15, 18, 19, 20, 21, 25, 27, 28, 47], "method": [4, 7, 13, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "modified_beam_search": [4, 5, 6, 7, 13, 27, 31, 33, 35, 45, 46, 47], "clean": [4, 9, 15, 20, 25, 27, 30, 31, 33, 34, 35, 36, 45, 46, 47], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 15, 18, 19, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "best": [4, 5, 6, 7, 18, 19, 20, 25, 28, 30], "7": [4, 6, 7, 9, 15, 16, 17, 20, 24, 25, 28, 30, 33, 34, 38, 39, 45, 46], "93": [4, 6, 7], "Then": [4, 6], "necessari": [4, 31], "note": [4, 5, 6, 7, 10, 16, 18, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "960": [4, 30, 33, 35, 36, 45, 46, 47], "hour": [4, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "ezerhouni": [4, 6, 7], "popd": [4, 6, 7, 21], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 15, 27, 41], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 15, 30], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 19, 25, 30, 41], "lodr_scal": 4, "24": [4, 9, 10, 15, 18, 19, 28, 34, 38, 39, 41], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 18, 19, 25, 30, 31, 34, 36, 38, 39], "embed": [4, 6, 7, 27, 33, 45, 46, 47], "dim": [4, 6, 7, 18, 19, 20, 27, 33, 46], "2048": [4, 6, 7, 16, 18, 19, 20, 27], "hidden": [4, 6, 7, 19, 45], "num": [4, 6, 7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "layer": [4, 6, 7, 18, 19, 20, 27, 31, 33, 43, 45, 46, 47], "vocab": [4, 6, 7, 30], "500": [4, 6, 7, 16, 18, 19, 20, 27, 30, 36, 45], "token": [4, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 34, 38, 39, 41], "ngram": [4, 30, 34, 38, 39], "2": [4, 6, 7, 9, 14, 16, 17, 24, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "extra": [4, 18, 19, 20, 27, 43, 46], "argument": [4, 7, 31, 43], "need": [4, 6, 13, 15, 16, 17, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "given": [4, 9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 46, 47], "specifi": [4, 7, 10, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "neg": [4, 27], "number": [4, 7, 13, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "obtain": [4, 7, 25, 27, 28, 30, 34, 38, 39], "shown": [4, 7], "below": [4, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46], "61": [4, 6], "6": [4, 6, 7, 9, 10, 17, 24, 25, 27, 30, 33, 34, 38, 39, 45], "74": [4, 6, 15, 16], "recal": 4, "lowest": [4, 33, 35, 36, 45, 46, 47], "77": [4, 6, 7, 15, 30], "08": [4, 6, 7, 9, 20, 30, 34, 36, 38, 39, 41, 45], "inde": 4, "even": [4, 13, 15, 19], "better": [4, 6], "increas": [4, 6, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "8": [4, 6, 7, 9, 10, 15, 16, 18, 19, 20, 25, 27, 30, 31, 33, 34, 35, 36, 41, 45, 46, 47], "45": [4, 6, 15, 18, 20, 25, 27, 30], "38": [4, 6, 15, 18, 25, 27, 30, 38], "23": [4, 6, 9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 38, 39, 41], "section": [5, 8, 9, 10, 11, 15, 16, 21, 22, 23, 24, 25, 30], "langugag": 5, "transduc": [5, 14, 16, 17, 21, 24, 26, 29, 31, 32, 42, 43, 44], "avail": [5, 6, 8, 14, 15, 16, 18, 19, 20, 25, 27, 30, 34, 38, 39, 41, 45], "beam": [5, 16, 45], "search": [5, 6, 7, 12, 13], "realli": [5, 25, 28, 30, 33, 35, 36, 45, 46, 47], "valu": [5, 7, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "rnn": [5, 6, 7, 14, 19, 27, 33, 35, 45, 46, 47], "t": [5, 15, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "doe": [5, 18, 19, 20, 25, 27, 30, 41], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6], "re": [5, 6, 10, 25, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 14], "commonli": [6, 7, 25, 27, 28, 30, 34, 38, 39, 41], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 15, 18, 19, 20, 25, 30, 31, 41, 43, 45, 46], "effici": [6, 7, 33, 46, 47], "than": [6, 15, 16, 19, 25, 27, 28, 30, 33, 34, 35, 36, 41, 45, 46, 47], "sinc": [6, 15, 18, 19, 20, 31, 41, 45], "less": [6, 16, 30, 34, 41, 46, 47], "comput": [6, 15, 16, 18, 19, 20, 25, 27, 28, 31, 33, 34, 36, 38, 39, 41, 45, 46, 47], "gpu": [6, 7, 8, 14, 15, 18, 19, 25, 27, 28, 30, 31, 33, 35, 36, 38, 39, 41, 45, 46, 47], "try": [6, 10, 11, 13, 31, 33, 35, 36, 45, 46, 47], "might": [6, 7, 19, 20, 46, 47], "ideal": [6, 7], "mai": [6, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47, 48], "With": [6, 15], "43": [6, 9, 19, 20, 30], "great": 6, "made": [6, 18], "boost": [6, 7], "tabl": [6, 13, 18, 19, 20], "67": [6, 15], "59": [6, 15, 18, 28, 30], "86": 6, "fact": 6, "arpa": [6, 41], "performn": 6, "depend": [6, 15, 25, 30], "kenlm": 6, "kpu": 6, "archiv": 6, "zip": 6, "execut": [6, 7, 18, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "9": [6, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 41, 45, 46, 47], "57": [6, 15, 19, 30, 34], "slightli": 6, "63": [6, 27], "04": [6, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "52": [6, 15, 25, 30], "73": 6, "mention": 6, "earlier": 6, "benchmark": [6, 27], "speed": [6, 18, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "132": 6, "95": [6, 26], "177": [6, 15, 16, 19, 20, 27, 28, 30], "96": [6, 15], "210": [6, 38, 39], "262": [6, 7], "62": [6, 7, 15, 30, 34], "65": [6, 7, 15, 18], "352": [6, 7, 30], "58": [6, 7, 10, 15, 30], "488": [6, 7, 18, 19, 20], "400": [6, 9, 26], "610": 6, "870": 6, "156": 6, "203": [6, 16, 30], "255": [6, 19, 20], "160": 6, "263": [6, 9, 15, 19], "singl": [6, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "32g": 6, "v100": [6, 25, 27, 28, 30], "vari": 6, "word": [7, 25, 27, 28, 30, 34, 38, 39, 41], "error": [7, 9, 10, 15, 18, 19, 20, 30], "rate": [7, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "These": [7, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "alreadi": [7, 15, 16], "But": [7, 18, 33, 35, 36, 45, 46, 47], "long": [7, 18], "true": [7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "either": [7, 13, 25, 27, 28, 30, 46, 47], "choos": [7, 13, 15, 31, 33, 35, 36, 45, 46, 47], "three": [7, 18, 19, 20, 23, 25, 27, 43], "associ": 7, "dimens": [7, 33, 46, 47], "obviou": 7, "rel": 7, "reduct": [7, 15, 18, 19, 35], "around": 7, "A": [7, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 45, 46, 47], "few": [7, 18, 19, 20, 31], "paramet": [7, 16, 18, 19, 20, 22, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "tune": [7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "control": [7, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "too": 7, "small": [7, 27, 38, 39, 41], "fulli": 7, "util": [7, 9, 10, 15, 30], "larg": 7, "domin": 7, "bad": 7, "typic": [7, 25, 27, 28, 30], "activ": [7, 13, 15], "path": [7, 9, 13, 15, 16, 18, 19, 20, 23, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "trade": 7, "off": [7, 18], "accuraci": [7, 18, 19, 26], "larger": [7, 19, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "slower": 7, "built": [8, 9, 15], "imag": [8, 14], "cpu": [8, 14, 15, 16, 18, 19, 20, 22, 25, 33, 35, 36, 41, 46, 47], "still": [8, 18, 19, 20], "introduct": [8, 14, 42, 48], "tag": [8, 14], "within": [8, 11, 13, 14, 18, 19], "updat": [8, 18, 19, 20], "host": [9, 16], "hub": 9, "k2fsa": 9, "find": [9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dockerfil": 9, "tree": [9, 22, 23, 25, 27, 28, 30, 34, 38, 39, 41, 45], "item": 9, "curl": 9, "registri": 9, "v2": [9, 20, 25, 30], "jq": 9, "give": [9, 27], "someth": [9, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "torch2": 9, "cuda11": [9, 10, 15], "torch1": [9, 10, 15], "cuda10": 9, "13": [9, 10, 15, 16, 18, 19, 20, 27, 28, 30, 34, 35, 38], "releas": [9, 15, 16, 18, 19, 20, 25, 27, 30], "torch": [9, 10, 14, 16, 17, 24, 25, 27, 30], "select": [9, 13, 15, 18, 19, 20, 33, 34, 38, 39, 41, 45, 46, 47], "appropri": [9, 15], "combin": [9, 18, 19, 20], "cuda": [9, 10, 14, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "sudo": [9, 25, 28], "rm": 9, "bin": [9, 15, 18, 19, 20, 25, 30], "bash": 9, "start": [9, 13, 15, 16, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "interfac": 9, "present": [9, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "root": [9, 18, 19, 20], "60c947eac59c": 9, "workspac": 9, "current": [9, 13, 18, 19, 27, 31, 43, 45, 46, 47, 48], "user": [9, 10], "copi": [9, 15, 43], "switch": [9, 15, 25, 30, 36], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 20], "site": [9, 10, 15, 20], "packag": [9, 10, 15, 20], "__init__": [9, 10, 15, 16, 18, 19, 20, 25, 27, 30], "line": [9, 10, 18, 19, 20, 33, 46, 47], "modul": [9, 14, 18, 20, 35, 46], "_k2": [9, 10, 15], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 14], "libcuda": 9, "cannot": [9, 14, 18, 19, 20], "share": [9, 14, 15], "object": [9, 14, 15, 25, 27, 28, 33, 41, 45, 46], "No": [9, 14, 18, 19, 20, 41], "stub": 9, "list": [9, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "16": [9, 15, 16, 18, 19, 20, 23, 25, 27, 28, 30, 33, 34, 38, 39, 41, 45, 46, 47], "second": [9, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "2023": [9, 15, 18, 19, 20, 35], "01": [9, 15, 18, 27, 28, 30, 31, 35], "02": [9, 15, 16, 18, 19, 20, 27, 30, 33, 39, 45, 46], "06": [9, 15, 16, 18, 28, 30, 34, 41], "info": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "264": [9, 15, 20], "posixpath": [9, 15, 18, 19, 20, 27, 30], "lang_dir": [9, 15, 27, 30], "lang_phon": [9, 15, 28, 34, 38, 39, 41], "feature_dim": [9, 15, 16, 18, 19, 20, 25, 27, 30, 41], "search_beam": [9, 15, 25, 30, 41], "20": [9, 15, 16, 18, 20, 25, 27, 28, 30, 33, 34, 38, 39, 41, 46], "output_beam": [9, 15, 25, 30, 41], "min_active_st": [9, 15, 25, 30, 41], "30": [9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "max_active_st": [9, 15, 25, 30, 41], "10000": [9, 15, 25, 30, 41], "use_double_scor": [9, 15, 25, 30, 41], "14": [9, 10, 15, 16, 18, 19, 22, 25, 30, 33, 34, 35, 38, 45, 46, 47], "export": [9, 10, 14, 15, 25, 27, 28, 30, 31, 34, 38, 39, 41], "feature_dir": [9, 15, 30], "fbank": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "max_dur": [9, 15, 30], "bucketing_sampl": [9, 15, 30], "num_bucket": [9, 15, 30], "concatenate_cut": [9, 15, 30], "duration_factor": [9, 15, 30], "gap": [9, 15, 30], "on_the_fly_feat": [9, 15, 30], "shuffl": [9, 15, 30], "return_cut": [9, 15, 30], "num_work": [9, 15, 30], "env_info": [9, 15, 16, 18, 19, 20, 25, 27, 30], "sha1": [9, 15, 16, 18, 19, 20, 25, 27, 30], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 15], "date": [9, 15, 16, 18, 19, 20, 25, 27, 30], "tue": [9, 15, 18, 30], "jul": [9, 15], "25": [9, 15, 16, 18, 19, 25, 30, 33, 38, 39, 41, 46], "36": [9, 15, 18, 27, 30, 31], "dev": [9, 10, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "7640d663": 9, "branch": [9, 15, 16, 18, 19, 20, 25, 27, 30, 35], "375520d": 9, "fri": [9, 16], "28": [9, 15, 18, 19, 27, 30, 34], "07": [9, 15, 18, 19, 20, 25, 27, 28, 30], "hostnam": [9, 15, 16, 18, 19, 20, 27], "ip": [9, 15, 16, 18, 19, 20, 27], "172": 9, "17": [9, 15, 16, 18, 19, 20, 25, 30, 38, 39, 45], "401": 9, "lexicon": [9, 15, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "168": [9, 15, 34], "compil": [9, 15, 18, 19, 25, 27, 30], "linv": [9, 15, 27, 30, 41], "403": [9, 34], "273": [9, 15, 16, 27], "devic": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 46, 47], "406": [9, 30], "291": [9, 15], "424": 9, "218": [9, 15, 19], "about": [9, 15, 18, 19, 20, 27, 31, 33, 36, 45, 46, 47], "cut": [9, 15, 30], "425": [9, 19, 30], "252": [9, 15], "504": 9, "204": [9, 15, 20, 30], "batch": [9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "process": [9, 15, 16, 18, 19, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "until": [9, 15, 30, 35], "w": [9, 15, 30, 38, 39], "nnpack": 9, "cpp": [9, 18, 22], "53": [9, 15, 20, 25, 33, 34, 39, 45, 46], "could": [9, 18, 19, 20, 25, 28], "reason": [9, 16, 18, 19, 20, 46], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 15, 25], "transcript": [9, 15, 25, 26, 27, 28, 30, 33, 34, 38, 39, 45, 46, 47], "store": [9, 15, 30], "recog": [9, 15, 27, 30], "test_set": [9, 15, 41], "688": 9, "564": [9, 15], "240": [9, 15, 25, 41], "ins": [9, 15, 30, 41], "del": [9, 15, 30, 41], "sub": [9, 15, 30, 41], "690": 9, "249": [9, 15, 19], "wrote": [9, 15, 30], "detail": [9, 15, 17, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "stat": [9, 15, 30], "err": [9, 15, 27, 30], "316": [9, 15, 30], "congratul": [9, 15, 18, 19, 20, 25, 28, 30, 34, 38, 39, 41], "finish": [9, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "successfulli": [9, 15, 18, 19, 20], "collect": [10, 15], "post": 10, "correspond": [10, 12, 13], "solut": 10, "One": 10, "torchaudio": [10, 14, 43], "cu111": 10, "torchvis": 10, "f": [10, 15, 38, 39], "org": [10, 15, 26, 27, 33, 45, 46, 47], "whl": [10, 15], "torch_stabl": [10, 15], "throw": [10, 18, 19, 20], "while": [10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "That": [10, 18, 19, 31, 33, 45, 46, 47], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 46, 47], "recent": [10, 18, 19, 20], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 18, 19, 25, 30], "xxx": [10, 16, 18, 19, 20], "next": [10, 13, 15, 18, 19, 20, 30, 31, 33, 34, 35, 36, 45, 46, 47], "gen": [10, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "kaldi": [10, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "34": [10, 18, 19], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 15, 18, 20, 27, 30, 34, 38], "tensorboard": [10, 15, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 15], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "py3": [10, 15], "linux": [10, 13, 15, 17, 18, 19, 20, 21], "x86_64": [10, 15, 18], "egg": 10, "handl": [10, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "except": [10, 16], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 35], "104": [10, 15], "rais": 10, "anaconda": 10, "maco": [10, 13, 17, 18, 19, 20, 21], "probabl": [10, 27, 33, 35, 45, 46, 47], "variabl": [10, 15, 18, 19, 20, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 18], "libpython": 10, "abl": 10, "insid": [10, 23], "codna_prefix": 10, "ld_library_path": 10, "anyth": [11, 13], "youtub": [11, 14, 30, 31, 33, 34, 35, 36, 45, 46, 47], "video": [11, 14, 30, 31, 33, 34, 35, 36, 45, 46, 47], "upload": [12, 13, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "visit": [12, 13, 33, 35, 36, 45, 46, 47], "specif": [12, 21, 27], "aishel": [12, 14, 25, 27, 28, 29, 48], "wenetspeech": [12, 22], "framework": [13, 33, 46], "sherpa": [13, 17, 22, 23, 24, 45], "window": [13, 17, 18, 19, 20, 21], "ipad": 13, "phone": 13, "recognit": [13, 14, 17, 18, 19, 26, 27, 41, 48], "screenshot": [13, 25, 27, 28, 30, 31, 33, 41, 45, 46], "chines": [13, 26, 27], "english": [13, 41, 45], "greedi": 13, "record": [13, 19, 20, 25, 26, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "click": [13, 15, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "button": 13, "submit": 13, "wait": 13, "moment": 13, "bottom": [13, 33, 35, 36, 45, 46, 47], "part": [13, 15, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "one": [13, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "subscrib": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "channel": [13, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "nadira": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "povei": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "www": [13, 15, 26, 30, 31, 33, 34, 35, 36, 45, 46, 47], "uc_vaumpkminz1pnkfxan9mw": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "toolkit": 14, "cudnn": 14, "docker": [14, 15], "frequent": 14, "ask": 14, "question": 14, "faq": 14, "oserror": 14, "libtorch_hip": 14, "attributeerror": 14, "distutil": 14, "attribut": [14, 20, 30], "libpython3": 14, "state_dict": [14, 24, 25, 27, 28, 30, 34, 38, 39, 41], "jit": [14, 17, 24, 30], "trace": [14, 17, 22, 24], "onnx": [14, 16, 24], "ncnn": [14, 24], "non": [14, 30, 43, 46, 48], "timit": [14, 29, 38, 39, 48], "contribut": 14, "support": [15, 17, 18, 19, 20, 25, 27, 30, 33, 35, 36, 43, 45, 46, 47], "setup": [15, 18, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "guid": 15, "matter": [15, 18], "don": [15, 18, 19, 20, 22, 25, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "suggest": [15, 33, 35, 36, 45, 46, 47], "wheel": [15, 18], "from_wheel": 15, "alwai": [15, 16], "strongli": 15, "pythonpath": [15, 18, 19, 20], "point": [15, 16, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "folder": [15, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "tmp": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "put": [15, 18, 19, 35, 46], "sever": [15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "just": [15, 18, 19, 20, 43], "kuangfangjun": [15, 18, 19, 20], "virtualenv": 15, "cpython3": 15, "final": [15, 16, 18, 19, 30, 34], "64": [15, 16, 18, 27, 46], "9422m": 15, "creator": 15, "cpython3posix": 15, "dest": 15, "star": [15, 18, 19, 20], "fj": [15, 16, 18, 19, 20, 27, 30], "fangjun": [15, 16, 18, 19, 20, 27, 30], "clear": 15, "no_vcs_ignor": 15, "global": 15, "seeder": 15, "fromappdata": 15, "bundl": 15, "app_data_dir": 15, "ad": [15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "seed": 15, "bashactiv": 15, "cshellactiv": 15, "fishactiv": 15, "nushellactiv": 15, "powershellactiv": 15, "pythonactiv": 15, "determin": 15, "nvidia": [15, 25, 27, 28, 30], "smi": 15, "head": [15, 27, 43], "wed": [15, 18, 25, 27, 30], "26": [15, 18, 19, 20, 27, 30, 39], "21": [15, 16, 18, 25, 27, 30, 38, 39], "49": [15, 18, 19, 30, 39, 41], "510": 15, "47": [15, 18, 19, 20, 25, 30], "03": [15, 16, 19, 27, 30, 38, 39, 45], "driver": 15, "greater": 15, "our": [15, 18, 19, 20, 22, 23, 30, 31, 33, 43, 46, 47], "case": [15, 16, 18, 19, 20, 33, 35, 36, 45, 46, 47], "verifi": 15, "nvcc": 15, "copyright": 15, "c": [15, 27, 28, 33, 35, 36, 41, 45, 46, 47], "2005": 15, "2019": 15, "corpor": 15, "wed_oct_23_19": 15, "38_pdt_2019": 15, "v10": 15, "89": [15, 25], "cu116": 15, "compat": 15, "audio": [15, 38, 39], "stabl": 15, "matrix": 15, "2bcu116": 15, "cp38": 15, "linux_x86_64": 15, "1983": 15, "mb": [15, 18, 19, 20], "________________________________________": 15, "gb": [15, 27], "764": 15, "kb": [15, 18, 19, 20, 38, 39], "eta": 15, "00": [15, 18, 25, 27, 28, 30, 34, 38, 39, 41], "satisfi": 15, "extens": 15, "__version__": 15, "dev20230725": 15, "pypi": 15, "tuna": 15, "tsinghua": 15, "edu": 15, "cn": 15, "csukuangfj": [15, 16, 18, 19, 21, 25, 27, 28, 30, 34, 38, 39, 41, 45], "resolv": 15, "main": [15, 25, 30, 43], "ubuntu": [15, 18, 19, 20], "2bcuda11": 15, "manylinux_2_17_x86_64": 15, "manylinux2014_x86_64": 15, "graphviz": 15, "cach": [15, 20], "de": [15, 16, 18, 19, 20, 27], "5e": 15, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 15, "none": [15, 25, 30], "o": 15, "cento": 15, "2009": 15, "core": 15, "cmake": [15, 18, 19, 25, 30], "27": [15, 18, 19, 20, 25, 27, 34, 39], "gcc": 15, "cmake_cuda_flag": 15, "wno": 15, "deprec": [15, 27], "lineinfo": 15, "expt": 15, "extend": 15, "lambda": 15, "use_fast_math": 15, "xptxa": 15, "gencod": 15, "arch": 15, "compute_35": 15, "sm_35": 15, "compute_50": 15, "sm_50": 15, "compute_60": 15, "sm_60": 15, "compute_61": 15, "sm_61": 15, "compute_70": 15, "sm_70": 15, "compute_75": 15, "sm_75": 15, "compute_80": 15, "sm_80": 15, "compute_86": 15, "sm_86": 15, "donnx_namespac": 15, "onnx_c2": 15, "compute_52": 15, "sm_52": 15, "xcudaf": 15, "diag_suppress": 15, "cc_clobber_ignor": 15, "integer_sign_chang": 15, "useless_using_declar": 15, "set_but_not_us": 15, "field_without_dll_interfac": 15, "base_class_has_different_dll_interfac": 15, "dll_interface_conflict_none_assum": 15, "dll_interface_conflict_dllexport_assum": 15, "implicit_return_from_non_void_funct": 15, "unsigned_compare_with_zero": 15, "declared_but_not_referenc": 15, "bad_friend_decl": 15, "relax": 15, "constexpr": 15, "d_glibcxx_use_cxx11_abi": 15, "option": [15, 17, 21, 24, 27, 31, 34, 38, 39, 41], "wall": 15, "strict": [15, 20, 26], "overflow": 15, "unknown": 15, "pragma": 15, "cmake_cxx_flag": 15, "unus": 15, "nvtx": 15, "enabl": [15, 31], "disabl": [15, 16, 18, 19], "debug": 15, "sync": 15, "kernel": [15, 18, 20, 27], "memori": [15, 18, 25, 27, 30, 43], "alloc": 15, "214748364800": 15, "byte": [15, 18, 19, 20], "200": [15, 16, 18, 19, 20, 25, 30, 31, 38, 39, 41], "abort": 15, "__file__": 15, "cpython": [15, 18], "gnu": [15, 18], "req": 15, "vq12fd5i": 15, "filter": 15, "quiet": [15, 26], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 15, "metadata": [15, 38, 39], "pyproject": 15, "toml": 15, "cytoolz": 15, "1e": 15, "3b": 15, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 15, "33": [15, 18, 19, 25, 26, 27, 30, 38], "pyyaml": 15, "c8": 15, "6b": 15, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 15, "ma": 15, "nylinux_2_17_x86_64": 15, "736": 15, "dataclass": 15, "2f": 15, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 15, "dev0": 15, "7640d66": 15, "lilcom": 15, "a8": 15, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 15, "linux_2_17_x86_64": 15, "87": [15, 18], "tqdm": 15, "e6": 15, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 15, "numpi": 15, "18": [15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 38, 39, 45, 46, 47], "audioread": 15, "5d": 15, "cb": 15, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 15, "tar": 15, "gz": 15, "377": 15, "tabul": 15, "40": [15, 18, 19, 20, 28, 30, 34, 38, 39], "44": [15, 18, 19, 30, 38, 39], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 15, "35": [15, 16, 18, 19, 20, 27, 30, 45], "1a": 15, "70": 15, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 15, "97": [15, 18, 25], "ab": [15, 33, 45, 46, 47], "c3": 15, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 15, "48": [15, 18, 19, 25, 27], "intervaltre": 15, "50": [15, 16, 18, 19, 20, 30, 33, 38, 45, 46, 47], "fb": 15, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 15, "soundfil": 15, "bd": 15, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 15, "py2": 15, "46": [15, 19, 25, 30], "toolz": 15, "7f": 15, "5c": 15, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 15, "55": [15, 18, 28, 30, 38], "sortedcontain": 15, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 15, "cffi": 15, "b7": 15, "8b": 15, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 15, "15": [15, 16, 18, 19, 20, 27, 28, 30, 38, 41], "442": 15, "pycpars": 15, "d5": 15, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 15, "118": [15, 30], "filenam": [15, 18, 19, 20, 21, 22, 23, 35, 36, 45, 47], "size": [15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "687627": 15, "sha256": 15, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 15, "ephem": 15, "wwtk90_m": 15, "7a": 15, "8e": 15, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 15, "23704": 15, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 15, "9c": 15, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 15, "26098": 15, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 15, "f3": 15, "ed": 15, "2b": 15, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 15, "remot": 15, "enumer": 15, "12942": 15, "count": 15, "100": [15, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "compress": 15, "56": [15, 18, 19, 30, 38], "total": [15, 19, 20, 25, 27, 28, 30, 31, 33, 34, 41, 45, 46], "delta": 15, "reus": 15, "pack": [15, 46, 47], "12875": 15, "receiv": 15, "mib": 15, "8835": 15, "41": [15, 18, 20, 25, 27, 38, 41], "dl_dir": [15, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "waves_yesno": 15, "___________________________________________________": 15, "70m": 15, "1mb": 15, "manifest": [15, 31], "718": 15, "compute_fbank_yesno": 15, "extract": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "featur": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "_______________________________________________________________________________": 15, "90": [15, 18], "82it": 15, "778": 15, "______________________________________________________________________________": 15, "256": [15, 20, 38, 39], "92it": 15, "51": [15, 18, 25, 30, 41], "lang": [15, 16, 27, 30, 36], "66": [15, 19], "project": 15, "kaldilm": 15, "csrc": [15, 30], "arpa_file_pars": 15, "cc": 15, "void": 15, "arpafilepars": 15, "std": 15, "istream": 15, "79": 15, "140": [15, 28], "92": [15, 30], "hlg": [15, 34, 38, 39, 41], "275": [15, 25], "compile_hlg": 15, "124": [15, 25, 30], "276": 15, "171": [15, 28, 30, 38, 39], "convert": [15, 18, 19, 20, 30], "l": [15, 18, 19, 20, 27, 38, 39, 41], "309": 15, "ctc_topo": 15, "max_token_id": 15, "310": 15, "314": 15, "intersect": [15, 33, 46, 47], "323": 15, "lg": [15, 33, 36, 46, 47], "shape": [15, 20], "connect": [15, 16, 30, 33, 34, 45, 46, 47], "68": [15, 30], "class": [15, 30], "tensor": [15, 19, 20, 25, 27, 28, 30, 33, 41, 45, 46], "71": [15, 30, 34], "341": 15, "rag": 15, "raggedtensor": 15, "76": [15, 41], "remov": [15, 25, 27, 28, 30, 34, 38, 39], "disambigu": 15, "354": 15, "91": 15, "remove_epsilon": 15, "445": 15, "arc": 15, "compos": 15, "h": 15, "446": 15, "106": [15, 19, 30], "109": [15, 25, 30], "447": 15, "111": [15, 30], "127": [15, 18, 19, 41], "cuda_visible_devic": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "segment": 15, "fault": 15, "dump": 15, "protocol_buffers_python_implement": 15, "674": 15, "interest": [15, 31, 33, 35, 36, 45, 46, 47], "936": 15, "481": 15, "482": 15, "lr": [15, 27, 45], "weight_decai": 15, "start_epoch": 15, "best_train_loss": [15, 16, 18, 19, 20], "inf": [15, 16, 18, 19, 20], "best_valid_loss": [15, 16, 18, 19, 20], "best_train_epoch": [15, 16, 18, 19, 20], "best_valid_epoch": [15, 16, 19, 20], "batch_idx_train": [15, 16, 18, 19, 20], "log_interv": [15, 16, 18, 19, 20], "reset_interv": [15, 16, 18, 19, 20], "valid_interv": [15, 16, 18, 19, 20], "beam_siz": [15, 16, 27], "sum": 15, "world_siz": [15, 31], "master_port": 15, "12354": 15, "num_epoch": 15, "3fb0a43": 15, "thu": [15, 16, 18, 19, 20, 27, 30, 34], "05": [15, 16, 18, 19, 25, 27, 28, 30, 39], "74279": [15, 16, 18, 19, 20, 27], "1220091118": 15, "57c4d55446": 15, "sph26": 15, "941": 15, "949": 15, "495": 15, "965": [15, 25], "146": 15, "244": 15, "967": 15, "149": [15, 18, 30], "199": [15, 30, 34], "singlecutsampl": 15, "205": [15, 30], "968": 15, "565": [15, 30], "422": 15, "loss": [15, 18, 19, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "065": 15, "over": [15, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "2436": 15, "frame": [15, 27, 33, 35, 46, 47], "tot_loss": 15, "681": [15, 18], "4561": 15, "2828": 15, "7076": 15, "22192": 15, "54": [15, 19, 20, 30, 34, 38, 39], "167": 15, "444": 15, "9002": 15, "18067": 15, "011": 15, "2555": 15, "2695": 15, "484": 15, "34971": 15, "331": [15, 18, 19, 30, 34], "4688": 15, "368": 15, "75": [15, 18], "633": 15, "2532": 15, "242": [15, 25, 30], "1139": 15, "1592": 15, "522": [15, 30], "1627": 15, "209": [15, 34], "07055": 15, "1175": 15, "07091": 15, "640": [15, 20], "847": 15, "07731": 15, "427": [15, 19, 30], "04391": 15, "05341": 15, "884": 15, "04384": 15, "387": [15, 39], "03458": 15, "04616": 15, "707": [15, 25, 30], "03379": 15, "758": [15, 30], "433": [15, 30], "01054": 15, "980": [15, 30], "009014": 15, "009974": 15, "489": [15, 25], "01085": 15, "258": [15, 38, 39], "01172": 15, "01055": 15, "621": [15, 41], "01074": 15, "699": 15, "866": 15, "01044": 15, "844": 15, "008942": 15, "221": [15, 30], "01082": 15, "970": [15, 30], "01169": 15, "247": 15, "01073": 15, "326": [15, 19], "555": 15, "840": 15, "841": 15, "855": 15, "868": 15, "882": 15, "883": 15, "157": 15, "701": 15, "702": [15, 30], "704": [15, 25, 38], "fun": [15, 18, 19], "variou": [15, 21, 24, 48], "period": [16, 18], "disk": 16, "optim": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "relat": [16, 25, 27, 30, 34, 38, 39, 41], "resum": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "strip": 16, "reduc": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "each": [16, 18, 19, 21, 25, 27, 28, 30, 33, 35, 36, 43, 45, 46, 47], "well": [16, 41, 48], "usag": [16, 18, 19, 20, 22, 23, 34, 38, 39, 41], "pruned_transducer_stateless3": [16, 22, 43], "almost": [16, 33, 43, 46, 47], "dict": [16, 20], "stateless3": [16, 18], "repo": [16, 21], "prefix": 16, "those": 16, "wave": [16, 18, 19, 20, 25, 30], "iter": [16, 18, 19, 20, 23, 33, 35, 36, 45, 46, 47], "1224000": 16, "greedy_search": [16, 27, 33, 35, 45, 46, 47], "test_wav": [16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "1089": [16, 18, 19, 20, 21, 30, 34], "134686": [16, 18, 19, 20, 21, 30, 34], "0001": [16, 18, 19, 20, 21, 30, 34], "wav": [16, 18, 19, 20, 21, 23, 25, 27, 28, 30, 33, 35, 36, 38, 39, 41, 45, 46, 47], "1221": [16, 18, 19, 30, 34], "135766": [16, 18, 19, 30, 34], "0002": [16, 18, 19, 30, 34], "multipl": [16, 25, 27, 28, 30, 34, 38, 39, 41], "sound": [16, 18, 19, 20, 23, 24, 25, 27, 28, 30, 34, 38, 39, 41], "Its": [16, 18, 19, 20, 30], "output": [16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "19": [16, 18, 19, 20, 25, 30, 34, 38, 39], "09": [16, 19, 25, 27, 28, 30, 45], "233": [16, 18, 19], "265": 16, "3000": [16, 18, 19, 20], "80": [16, 18, 19, 20, 25, 27, 30], "subsampling_factor": [16, 19, 20, 25, 27, 30], "encoder_dim": [16, 18, 19, 20], "512": [16, 18, 19, 20, 25, 27, 30], "nhead": [16, 18, 20, 25, 27, 30, 33, 46], "dim_feedforward": [16, 18, 19, 27], "num_encoder_lay": [16, 18, 19, 20, 27], "decoder_dim": [16, 18, 19, 20], "joiner_dim": [16, 18, 19, 20], "model_warm_step": [16, 18, 19], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 16, "oct": [16, 30], "miss": [16, 18, 19, 20, 27, 30], "cu102": [16, 18, 19, 20], "1013": 16, "c39cba5": 16, "dirti": [16, 18, 19, 25, 30], "ceph": [16, 25, 27, 30], "jsonl": 16, "0324160024": 16, "65bfd8b584": 16, "jjlbn": 16, "bpe_model": [16, 18, 19, 20, 30], "sound_fil": [16, 25, 27, 30, 41], "sample_r": [16, 25, 27, 30, 41], "16000": [16, 25, 27, 28, 30, 34, 35, 38, 39], "max_context": 16, "max_stat": 16, "context_s": [16, 18, 19, 20, 27], "max_sym_per_fram": [16, 27], "simulate_stream": 16, "decode_chunk_s": 16, "left_context": 16, "dynamic_chunk_train": 16, "causal_convolut": 16, "short_chunk_s": [16, 20, 46, 47], "num_left_chunk": [16, 20], "blank_id": [16, 18, 19, 20, 27], "unk_id": 16, "vocab_s": [16, 18, 19, 20, 27], "271": [16, 19], "612": 16, "458": 16, "giga": [16, 19, 45], "623": 16, "277": 16, "78648040": 16, "951": [16, 30], "285": [16, 27, 30], "construct": [16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "952": 16, "295": [16, 25, 27, 28, 30], "957": 16, "301": [16, 30], "700": 16, "329": [16, 19, 30], "912": 16, "388": 16, "earli": [16, 18, 19, 20, 30, 34], "nightfal": [16, 18, 19, 20, 30, 34], "THE": [16, 18, 19, 20, 30, 34], "yellow": [16, 18, 19, 20, 30, 34], "lamp": [16, 18, 19, 20, 30, 34], "light": [16, 18, 19, 20, 30, 34], "AND": [16, 18, 19, 20, 30, 34], "THERE": [16, 18, 19, 20, 30, 34], "squalid": [16, 18, 19, 20, 30, 34], "quarter": [16, 18, 19, 20, 30, 34], "OF": [16, 18, 19, 20, 30, 34], "brothel": [16, 18, 19, 20, 30, 34], "god": [16, 30, 34], "AS": [16, 30, 34], "direct": [16, 30, 34], "consequ": [16, 30, 34], "sin": [16, 30, 34], "man": [16, 30, 34], "punish": [16, 30, 34], "had": [16, 30, 34], "her": [16, 30, 34], "love": [16, 30, 34], "child": [16, 30, 34], "whose": [16, 27, 30, 34], "ON": [16, 18, 30, 34], "THAT": [16, 30, 34], "dishonor": [16, 30, 34], "bosom": [16, 30, 34], "TO": [16, 30, 34], "parent": [16, 30, 34], "forev": [16, 30, 34], "WITH": [16, 30, 34], "race": [16, 30, 34], "descent": [16, 30, 34], "mortal": [16, 30, 34], "BE": [16, 30, 34], "bless": [16, 30, 34], "soul": [16, 30, 34], "IN": [16, 30, 34], "heaven": [16, 30, 34], "yet": [16, 18, 19, 30, 34], "THESE": [16, 30, 34], "thought": [16, 30, 34], "affect": [16, 30, 34], "hester": [16, 30, 34], "prynn": [16, 30, 34], "hope": [16, 26, 30, 34], "apprehens": [16, 30, 34], "390": 16, "down": [16, 25, 30, 33, 35, 36, 45, 46, 47], "reproduc": [16, 30], "9999": [16, 35, 36, 45], "symlink": 16, "pass": [16, 20, 25, 27, 28, 30, 33, 35, 36, 43, 45, 46, 47], "zipform": [17, 21, 24, 29, 32, 42, 44], "convemform": [17, 24, 43], "platform": [17, 21], "android": [17, 18, 19, 20, 21], "raspberri": [17, 21], "pi": [17, 21], "\u7231\u82af\u6d3e": 17, "maix": 17, "iii": 17, "axera": 17, "rv1126": 17, "static": 17, "produc": [17, 33, 35, 36, 45, 46, 47], "binari": [17, 18, 19, 20, 25, 27, 28, 30, 33, 41, 45, 46], "everyth": 17, "pnnx": [17, 24], "torchscript": [17, 22, 23, 24], "encod": [17, 21, 23, 24, 25, 27, 28, 30, 33, 34, 35, 41, 43, 45, 46, 47], "int8": [17, 24], "quantiz": [17, 24, 31], "conv": [18, 19], "emform": [18, 19, 22], "stateless2": [18, 19, 45], "pretrained_model": [18, 19, 20], "online_transduc": 18, "continu": [18, 19, 20, 21, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "jit_xxx": [18, 19, 20], "anywher": [18, 19], "submodul": 18, "recurs": 18, "init": 18, "dcmake_build_typ": [18, 25, 30], "dncnn_python": 18, "dncnn_build_benchmark": 18, "dncnn_build_exampl": 18, "dncnn_build_tool": 18, "j4": 18, "pwd": 18, "src": [18, 20], "compon": [18, 43], "ncnn2int8": [18, 19], "am": 18, "sai": [18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "later": [18, 19, 20, 25, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "termin": 18, "tencent": [18, 19], "modif": [18, 27], "offic": 18, "synchron": 18, "offici": 18, "renam": [18, 19, 20], "conv_emformer_transducer_stateless2": [18, 43], "length": [18, 20, 27, 46, 47], "cnn": [18, 20], "31": [18, 19, 20, 30], "context": [18, 27, 33, 43, 45, 46, 47], "configur": [18, 20, 27, 31, 34, 38, 39, 41], "accordingli": [18, 19, 20], "yourself": [18, 19, 20, 31, 46, 47], "677": 18, "220": [18, 27, 28, 30], "229": [18, 25], "best_v": 18, "alid_epoch": 18, "subsampl": [18, 46, 47], "ing_factor": 18, "a34171ed85605b0926eebbd0463d059431f4f74a": 18, "dec": 18, "ver": 18, "ion": 18, "530e8a1": 18, "op": 18, "1220120619": [18, 19, 20], "7695ff496b": [18, 19, 20], "s9n4w": [18, 19, 20], "icefa": 18, "ll": 18, "transdu": 18, "cer": 18, "use_averaged_model": [18, 19, 20], "cnn_module_kernel": [18, 20], "left_context_length": 18, "chunk_length": 18, "right_context_length": 18, "memory_s": 18, "231": [18, 19, 20], "053": 18, "112": [18, 19, 20], "022": 18, "708": [18, 25, 27, 30, 41], "315": [18, 25, 27, 28, 30, 34], "75490012": 18, "318": [18, 19], "320": [18, 27], "682": 18, "lh": [18, 19, 20], "rw": [18, 19, 20], "289m": 18, "jan": [18, 19, 20], "289": 18, "roughli": [18, 19, 20], "equal": [18, 19, 20, 46, 47], "1024": [18, 19, 20, 45], "287": [18, 41], "1010k": [18, 19], "decoder_jit_trac": [18, 19, 20, 23, 45, 47], "283m": 18, "encoder_jit_trac": [18, 19, 20, 23, 45, 47], "0m": [18, 19], "joiner_jit_trac": [18, 19, 20, 23, 45, 47], "sure": [18, 19, 20], "found": [18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "param": [18, 19, 20], "503k": [18, 19], "437": [18, 19, 20], "142m": 18, "79k": 18, "5m": [18, 19], "architectur": [18, 19, 20, 45], "editor": [18, 19, 20], "content": [18, 19, 20], "283": [18, 20], "1010": [18, 19], "142": [18, 25, 28, 30], "503": [18, 19], "convers": [18, 19, 20], "half": [18, 19, 20, 33, 46, 47], "v": [18, 19, 20, 30, 38, 39], "default": [18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "float32": [18, 19, 20], "float16": [18, 19, 20], "occupi": [18, 19, 20], "twice": [18, 19, 20], "smaller": [18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "fp16": [18, 19, 20, 33, 35, 36, 45, 46, 47], "won": [18, 19, 20, 21, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "accept": [18, 19, 20], "216": [18, 25, 30, 38, 39], "encoder_param_filenam": [18, 19, 20], "encoder_bin_filenam": [18, 19, 20], "decoder_param_filenam": [18, 19, 20], "decoder_bin_filenam": [18, 19, 20], "joiner_param_filenam": [18, 19, 20], "joiner_bin_filenam": [18, 19, 20], "sound_filenam": [18, 19, 20], "141": 18, "328": 18, "151": 18, "176": [18, 27, 30], "336": 18, "106000": [18, 19, 20, 30, 34], "581": [18, 34], "381": 18, "7767517": [18, 19, 20], "1060": 18, "1342": 18, "in0": [18, 19, 20], "explan": [18, 19, 20], "magic": [18, 19, 20], "intermedi": [18, 19, 20], "mean": [18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "increment": [18, 19, 20], "1061": 18, "sherpametadata": [18, 19, 20], "sherpa_meta_data1": [18, 19, 20], "newli": [18, 19, 20], "must": [18, 19, 20, 46], "kei": [18, 19, 20, 30], "eas": [18, 19, 20], "pair": [18, 19, 20], "sad": [18, 19, 20], "rememb": [18, 19, 20], "anymor": [18, 19, 20], "flexibl": [18, 19, 20], "edit": [18, 19, 20], "arm": [18, 19, 20], "aarch64": [18, 19, 20], "onc": [18, 19], "mayb": [18, 19], "year": [18, 19], "_jit_trac": [18, 19], "fp32": [18, 19], "doubl": [18, 19], "j": [18, 19, 25, 30], "py38": [18, 19, 20], "arg": [18, 19], "wave_filenam": [18, 19], "16k": [18, 19], "hz": [18, 19, 38, 39], "mono": [18, 19], "calibr": [18, 19], "cat": [18, 19], "eof": [18, 19], "calcul": [18, 19, 35, 46, 47], "has_gpu": [18, 19], "config": [18, 19], "use_vulkan_comput": [18, 19], "88": [18, 27], "conv_87": 18, "942385": [18, 19], "threshold": [18, 19, 35], "938493": 18, "968131": 18, "conv_88": 18, "442448": 18, "549335": 18, "167552": 18, "conv_89": 18, "228289": 18, "001738": 18, "871552": 18, "linear_90": 18, "976146": 18, "101789": 18, "115": [18, 19, 25, 30], "267128": 18, "linear_91": 18, "962030": 18, "162033": 18, "602713": 18, "linear_92": 18, "323041": 18, "853959": 18, "953129": 18, "linear_94": 18, "905416": 18, "648006": 18, "323545": 18, "linear_93": 18, "474093": 18, "200188": 18, "linear_95": 18, "888012": 18, "403563": 18, "483986": 18, "linear_96": 18, "856741": 18, "398679": 18, "524273": 18, "linear_97": 18, "635942": 18, "613655": 18, "590950": 18, "linear_98": 18, "460340": 18, "670146": 18, "398010": 18, "linear_99": 18, "532276": 18, "585537": 18, "119396": 18, "linear_101": 18, "585871": 18, "719224": 18, "205809": 18, "linear_100": 18, "751382": 18, "081648": 18, "linear_102": 18, "593344": 18, "450581": 18, "551147": 18, "linear_103": 18, "592681": 18, "705824": 18, "257959": 18, "linear_104": 18, "752957": 18, "980955": 18, "110489": 18, "linear_105": 18, "696240": 18, "877193": 18, "608953": 18, "linear_106": 18, "059659": 18, "643138": 18, "048950": 18, "linear_108": 18, "975461": 18, "589567": 18, "671457": 18, "linear_107": 18, "190381": 18, "515701": 18, "linear_109": 18, "710759": 18, "305635": 18, "082436": 18, "linear_110": 18, "531228": 18, "731162": 18, "159557": 18, "linear_111": 18, "528083": 18, "259322": 18, "211544": 18, "linear_112": 18, "148807": 18, "500842": 18, "087374": 18, "linear_113": 18, "592566": 18, "948851": 18, "166611": 18, "linear_115": 18, "437109": 18, "608947": 18, "642395": 18, "linear_114": 18, "193942": 18, "503904": 18, "linear_116": 18, "966980": 18, "200896": 18, "676392": 18, "linear_117": 18, "451303": 18, "061664": 18, "951344": 18, "linear_118": 18, "077262": 18, "965800": 18, "023804": 18, "linear_119": 18, "671615": 18, "847613": 18, "198460": 18, "linear_120": 18, "625638": 18, "131427": 18, "556595": 18, "linear_122": 18, "274080": 18, "888716": 18, "978189": 18, "linear_121": 18, "420480": 18, "429659": 18, "linear_123": 18, "826197": 18, "599617": 18, "281532": 18, "linear_124": 18, "396383": 18, "325849": 18, "335875": 18, "linear_125": 18, "337198": 18, "941410": 18, "221970": 18, "linear_126": 18, "699965": 18, "842878": 18, "224073": 18, "linear_127": 18, "775370": 18, "884215": 18, "696438": 18, "linear_129": 18, "872276": 18, "837319": 18, "254213": 18, "linear_128": 18, "180057": 18, "687883": 18, "linear_130": 18, "150427": 18, "454298": 18, "765789": 18, "linear_131": 18, "112692": 18, "924847": 18, "025545": 18, "linear_132": 18, "852893": 18, "116593": 18, "749626": 18, "linear_133": 18, "517084": 18, "024665": 18, "275314": 18, "linear_134": 18, "683807": 18, "878618": 18, "743618": 18, "linear_136": 18, "421055": 18, "322729": 18, "086264": 18, "linear_135": 18, "309880": 18, "917679": 18, "linear_137": 18, "827781": 18, "744595": 18, "915554": 18, "linear_138": 18, "422395": 18, "742882": 18, "402161": 18, "linear_139": 18, "527538": 18, "866123": 18, "849449": 18, "linear_140": 18, "128619": 18, "657793": 18, "266134": 18, "linear_141": 18, "839593": 18, "845993": 18, "021378": 18, "linear_143": 18, "442304": 18, "099039": 18, "889746": 18, "linear_142": 18, "325038": 18, "849592": 18, "linear_144": 18, "929444": 18, "618206": 18, "605080": 18, "linear_145": 18, "382126": 18, "321095": 18, "625010": 18, "linear_146": 18, "894987": 18, "867645": 18, "836517": 18, "linear_147": 18, "915313": 18, "906028": 18, "886522": 18, "linear_148": 18, "614287": 18, "908151": 18, "496181": 18, "linear_150": 18, "724932": 18, "485588": 18, "312899": 18, "linear_149": 18, "161146": 18, "606939": 18, "linear_151": 18, "164453": 18, "847355": 18, "719223": 18, "linear_152": 18, "086471": 18, "984121": 18, "222834": 18, "linear_153": 18, "099524": 18, "991601": 18, "816805": 18, "linear_154": 18, "054585": 18, "489706": 18, "286930": 18, "linear_155": 18, "389185": 18, "100321": 18, "963501": 18, "linear_157": 18, "982999": 18, "154796": 18, "637253": 18, "linear_156": 18, "537706": 18, "875190": 18, "linear_158": 18, "420287": 18, "502287": 18, "531588": 18, "linear_159": 18, "014746": 18, "423280": 18, "477261": 18, "linear_160": 18, "633553": 18, "715335": 18, "220921": 18, "linear_161": 18, "371849": 18, "117830": 18, "815203": 18, "linear_162": 18, "492933": 18, "126283": 18, "623318": 18, "linear_164": 18, "697504": 18, "825712": 18, "317358": 18, "linear_163": 18, "078367": 18, "008038": 18, "linear_165": 18, "023975": 18, "836278": 18, "577358": 18, "linear_166": 18, "860619": 18, "259792": 18, "493614": 18, "linear_167": 18, "380934": 18, "496160": 18, "107042": 18, "linear_168": 18, "691216": 18, "733317": 18, "831076": 18, "linear_169": 18, "723948": 18, "952728": 18, "129707": 18, "linear_171": 18, "034811": 18, "366547": 18, "665123": 18, "linear_170": 18, "356277": 18, "710501": 18, "linear_172": 18, "556884": 18, "729481": 18, "166058": 18, "linear_173": 18, "033039": 18, "207264": 18, "442120": 18, "linear_174": 18, "597379": 18, "658676": 18, "768131": 18, "linear_2": [18, 19], "293503": 18, "305265": 18, "877850": 18, "linear_1": [18, 19], "812222": 18, "766452": 18, "487047": 18, "linear_3": [18, 19], "999999": 18, "999755": 18, "031174": 18, "wish": [18, 19], "955k": 18, "18k": 18, "inparam": [18, 19], "inbin": [18, 19], "outparam": [18, 19], "outbin": [18, 19], "99m": 18, "78k": 18, "774k": [18, 19], "496": [18, 19, 30, 34], "replac": [18, 19], "774": [18, 19], "linear": [18, 19, 27], "convolut": [18, 19, 35, 43, 46], "exact": [18, 19], "4x": [18, 19], "comparison": 18, "468000": [19, 23, 45], "lstm_transducer_stateless2": [19, 23, 45], "862": 19, "222": [19, 28, 30], "865": 19, "is_pnnx": 19, "62e404dd3f3a811d73e424199b3408e309c06e1a": [19, 20], "mon": [19, 20], "6d7a559": [19, 20], "feb": [19, 20, 27], "147": [19, 20], "rnn_hidden_s": 19, "aux_layer_period": 19, "235": 19, "239": [19, 27], "472": 19, "595": 19, "324": 19, "83137520": 19, "596": 19, "325": 19, "257024": 19, "781812": 19, "327": 19, "84176356": 19, "182": [19, 20, 25, 34], "158": 19, "183": [19, 38, 39], "335": 19, "101": 19, "tracerwarn": [19, 20], "boolean": [19, 20], "caus": [19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "incorrect": [19, 20, 27], "flow": [19, 20], "constant": [19, 20], "futur": [19, 20, 27, 48], "need_pad": 19, "bool": 19, "259": [19, 25], "180": [19, 25, 30], "339": 19, "304": 19, "207": [19, 28, 30], "84": [19, 25], "324m": 19, "321": [19, 25], "107": [19, 34], "318m": 19, "159m": 19, "21k": 19, "159": [19, 30, 41], "37": [19, 25, 27, 30, 38], "861": 19, "266": [19, 20, 30, 34], "431": 19, "342": 19, "343": 19, "267": [19, 27, 38, 39], "379": 19, "268": [19, 30, 34], "317m": 19, "317": 19, "conv_15": 19, "930708": 19, "972025": 19, "conv_16": 19, "978855": 19, "031788": 19, "456645": 19, "conv_17": 19, "868437": 19, "830528": 19, "218575": 19, "linear_18": 19, "107259": 19, "194808": 19, "293236": 19, "linear_19": 19, "193777": 19, "634748": 19, "401705": 19, "linear_20": 19, "259933": 19, "606617": 19, "722160": 19, "linear_21": 19, "186600": 19, "790260": 19, "512129": 19, "linear_22": 19, "759041": 19, "265832": 19, "050053": 19, "linear_23": 19, "931209": 19, "099090": 19, "979767": 19, "linear_24": 19, "324160": 19, "215561": 19, "321835": 19, "linear_25": 19, "800708": 19, "599352": 19, "284134": 19, "linear_26": 19, "492444": 19, "153369": 19, "274391": 19, "linear_27": 19, "660161": 19, "720994": 19, "674126": 19, "linear_28": 19, "415265": 19, "174434": 19, "007133": 19, "linear_29": 19, "038418": 19, "118534": 19, "724262": 19, "linear_30": 19, "072084": 19, "936867": 19, "259155": 19, "linear_31": 19, "342712": 19, "599489": 19, "282787": 19, "linear_32": 19, "340535": 19, "120308": 19, "701103": 19, "linear_33": 19, "846987": 19, "630030": 19, "985939": 19, "linear_34": 19, "686298": 19, "204571": 19, "607586": 19, "linear_35": 19, "904821": 19, "575518": 19, "756420": 19, "linear_36": 19, "806659": 19, "585589": 19, "118401": 19, "linear_37": 19, "402340": 19, "047157": 19, "162680": 19, "linear_38": 19, "174589": 19, "923361": 19, "030258": 19, "linear_39": 19, "178576": 19, "556058": 19, "807705": 19, "linear_40": 19, "901954": 19, "301267": 19, "956539": 19, "linear_41": 19, "839805": 19, "597429": 19, "716181": 19, "linear_42": 19, "178945": 19, "651595": 19, "895699": 19, "829245": 19, "627592": 19, "637907": 19, "746186": 19, "255032": 19, "167313": 19, "000000": 19, "999756": 19, "031013": 19, "345k": 19, "17k": 19, "218m": 19, "counterpart": 19, "bit": [19, 25, 27, 28, 30, 34, 41], "4532": 19, "feedforward": [20, 27, 33, 46], "384": [20, 30], "192": [20, 30], "unmask": 20, "downsampl": [20, 26], "factor": [20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "473": [20, 30], "246": [20, 27, 30, 38, 39], "477": 20, "warm_step": 20, "2000": [20, 28], "feedforward_dim": 20, "attention_dim": [20, 25, 27, 30], "encoder_unmasked_dim": 20, "zipformer_downsampling_factor": 20, "decode_chunk_len": 20, "257": [20, 27, 38, 39], "023": 20, "zipformer2": 20, "419": 20, "At": [20, 25, 30], "stack": 20, "downsampling_factor": 20, "037": 20, "655": 20, "346": 20, "68944004": 20, "347": 20, "260096": 20, "348": [20, 38], "716276": 20, "656": [20, 30], "349": 20, "69920376": 20, "351": 20, "353": 20, "174": [20, 30], "175": 20, "1344": 20, "assert": 20, "cached_len": 20, "num_lay": 20, "1348": 20, "cached_avg": 20, "1352": 20, "cached_kei": 20, "1356": 20, "cached_v": 20, "1360": 20, "cached_val2": 20, "1364": 20, "cached_conv1": 20, "1368": 20, "cached_conv2": 20, "1373": 20, "left_context_len": 20, "1884": 20, "x_size": 20, "2442": 20, "2449": 20, "2469": 20, "2473": 20, "2483": 20, "kv_len": 20, "k": [20, 33, 38, 39, 45, 46, 47], "2570": 20, "attn_output": 20, "bsz": 20, "num_head": 20, "seq_len": 20, "head_dim": 20, "2926": 20, "lorder": 20, "2652": 20, "2653": 20, "embed_dim": 20, "2666": 20, "1543": 20, "in_x_siz": 20, "1637": 20, "1643": 20, "in_channel": 20, "1571": 20, "1763": 20, "src1": 20, "src2": 20, "1779": 20, "dim1": 20, "1780": 20, "dim2": 20, "_trace": 20, "958": 20, "tracer": 20, "instead": [20, 27, 46], "tupl": 20, "namedtupl": 20, "absolut": 20, "know": [20, 31], "side": 20, "allow": [20, 33, 46], "behavior": [20, 27], "_c": 20, "_create_method_from_trac": 20, "646": 20, "357": 20, "102": [20, 25], "embedding_out": 20, "686": 20, "361": [20, 30, 34], "735": 20, "69": 20, "269m": 20, "269": [20, 25, 38, 39], "725": [20, 34], "1022k": 20, "266m": 20, "8m": 20, "509k": 20, "133m": 20, "152k": 20, "4m": 20, "1022": 20, "133": 20, "509": 20, "260": [20, 30], "360": 20, "365": 20, "280": [20, 30], "372": [20, 25], "state": [20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "026": 20, "410": 20, "411": [20, 30], "2028": 20, "2547": 20, "2029": 20, "23316": 20, "23317": 20, "23318": 20, "23319": 20, "23320": 20, "amount": [20, 26], "pad": [20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "conv2dsubsampl": 20, "arrai": 20, "23300": 20, "element": 20, "onnx_pretrain": 21, "onnxruntim": 21, "separ": 21, "deploi": [21, 25, 30], "repo_url": 21, "basenam": 21, "cpu_jit": [22, 25, 30, 33, 35, 36, 46, 47], "confus": 22, "move": [22, 33, 35, 36, 46, 47], "why": 22, "streaming_asr": [22, 23, 45, 46, 47], "conv_emform": 22, "offline_asr": [22, 33], "jit_pretrain": [23, 35, 36, 45], "baz": 23, "1best": [25, 28, 30, 34, 35, 36, 38, 39], "automag": [25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "stop": [25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "By": [25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "musan": [25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "thei": [25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "intal": [25, 28], "apt": [25, 28], "permiss": [25, 28], "commandlin": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "quit": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "experi": [25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "world": [25, 27, 28, 30, 31, 33, 34, 35, 36, 45, 46, 47], "multi": [25, 27, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "machin": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "ddp": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "implement": [25, 27, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "utter": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "oom": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "due": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "decai": [25, 28, 30, 35, 36, 45], "warmup": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "function": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "get_param": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "directli": [25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "perturb": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "actual": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "3x150": [25, 27, 28], "450": [25, 27, 28], "visual": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "logdir": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "labelsmooth": 25, "tensorflow": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "press": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "ctrl": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "engw8ksktzqs24zbv5dgcg": 25, "2021": [25, 28, 30, 34, 38, 39, 41], "22t11": 25, "scan": [25, 27, 28, 30, 33, 41, 45, 46], "116068": 25, "scalar": [25, 27, 28, 30, 33, 41, 45, 46], "listen": [25, 27, 28, 33, 41, 45, 46], "url": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "xxxx": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "saw": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "consol": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "avoid": [25, 27, 30], "nbest": [25, 30, 36], "lattic": [25, 28, 30, 33, 34, 38, 39, 46, 47], "uniqu": [25, 30, 33, 46, 47], "pkufool": [25, 28, 34], "icefall_asr_aishell_conformer_ctc": 25, "transcrib": [25, 27, 28, 30], "v1": [25, 28, 30, 34, 38, 39], "lang_char": [25, 27], "bac009s0764w0121": [25, 27, 28], "bac009s0764w0122": [25, 27, 28], "bac009s0764w0123": [25, 27, 28], "tran": [25, 28, 30, 34, 38, 39], "graph": [25, 28, 30, 33, 34, 38, 39, 46, 47], "id": [25, 28, 30, 34, 38, 39], "conveni": [25, 28, 30, 31], "eo": [25, 28, 30], "soxi": [25, 27, 28, 30, 34, 41], "sampl": [25, 27, 28, 30, 34, 35, 41, 46, 47], "precis": [25, 27, 28, 30, 33, 34, 41, 46, 47], "67263": [25, 27, 28], "cdda": [25, 27, 28, 30, 34, 41], "sector": [25, 27, 28, 30, 34, 41], "135k": [25, 27, 28], "256k": [25, 27, 28, 30], "sign": [25, 27, 28, 30, 41], "integ": [25, 27, 28, 30, 41], "pcm": [25, 27, 28, 30, 41], "65840": [25, 27, 28], "308": [25, 27, 28], "625": [25, 27, 28], "132k": [25, 27, 28], "64000": [25, 27, 28], "300": [25, 27, 28, 30, 31, 33, 46], "128k": [25, 27, 28, 41], "displai": [25, 27, 28, 30], "topologi": [25, 30], "num_decoder_lay": [25, 30], "vgg_frontend": [25, 27, 30], "use_feat_batchnorm": [25, 30], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 25, "sun": 25, "sep": 25, "33cfe45": 25, "d57a873": 25, "nov": [25, 30], "hw": 25, "kangwei": 25, "icefall_aishell3": 25, "k2_releas": 25, "tokens_fil": 25, "words_fil": [25, 30, 41], "num_path": [25, 30, 33, 46, 47], "ngram_lm_scal": [25, 30], "attention_decoder_scal": [25, 30], "nbest_scal": [25, 30], "sos_id": [25, 30], "eos_id": [25, 30], "num_class": [25, 30, 41], "4336": [25, 27], "131": [25, 30], "134": 25, "138": [25, 27], "293": [25, 30], "369": [25, 30], "\u751a": [25, 27], "\u81f3": [25, 27], "\u51fa": [25, 27], "\u73b0": [25, 27], "\u4ea4": [25, 27], "\u6613": [25, 27], "\u51e0": [25, 27], "\u4e4e": [25, 27], "\u505c": [25, 27], "\u6b62": 25, "\u7684": [25, 27, 28], "\u60c5": [25, 27], "\u51b5": [25, 27], "\u4e00": [25, 27], "\u4e8c": [25, 27], "\u7ebf": [25, 27, 28], "\u57ce": [25, 27], "\u5e02": [25, 27], "\u867d": [25, 27], "\u7136": [25, 27], "\u4e5f": [25, 27, 28], "\u5904": [25, 27], "\u4e8e": [25, 27], "\u8c03": [25, 27], "\u6574": [25, 27], "\u4e2d": [25, 27, 28], "\u4f46": [25, 27, 28], "\u56e0": [25, 27], "\u4e3a": [25, 27], "\u805a": [25, 27], "\u96c6": [25, 27], "\u4e86": [25, 27, 28], "\u8fc7": [25, 27], "\u591a": [25, 27], "\u516c": [25, 27], "\u5171": [25, 27], "\u8d44": [25, 27], "\u6e90": [25, 27], "371": 25, "683": 25, "684": [25, 41], "651": [25, 41], "654": 25, "659": 25, "752": 25, "887": 25, "340": 25, "370": 25, "\u751a\u81f3": [25, 28], "\u51fa\u73b0": [25, 28], "\u4ea4\u6613": [25, 28], "\u51e0\u4e4e": [25, 28], "\u505c\u6b62": 25, "\u60c5\u51b5": [25, 28], "\u4e00\u4e8c": [25, 28], "\u57ce\u5e02": [25, 28], "\u867d\u7136": [25, 28], "\u5904\u4e8e": [25, 28], "\u8c03\u6574": [25, 28], "\u56e0\u4e3a": [25, 28], "\u805a\u96c6": [25, 28], "\u8fc7\u591a": [25, 28], "\u516c\u5171": [25, 28], "\u8d44\u6e90": [25, 28], "recor": [25, 30], "highest": [25, 30], "966": 25, "821": 25, "822": 25, "826": 25, "916": 25, "345": 25, "888": 25, "889": 25, "limit": [25, 27, 30, 43, 46], "upgrad": [25, 30], "pro": [25, 30], "NOT": [25, 27, 30, 41], "checkout": [25, 30], "hlg_decod": [25, 30], "four": [25, 30], "messag": [25, 30, 33, 35, 36, 45, 46, 47], "nn_model": [25, 30], "use_gpu": [25, 30], "word_tabl": [25, 30], "caution": [25, 30], "forward": [25, 30, 35], "cu": [25, 30], "int": [25, 30], "char": [25, 30], "98": 25, "150": [25, 30], "693": [25, 38], "165": [25, 30], "nnet_output": [25, 30], "185": [25, 30, 41], "217": [25, 30], "mandarin": 26, "beij": 26, "shell": 26, "technologi": 26, "ltd": 26, "peopl": 26, "accent": 26, "area": 26, "china": 26, "invit": 26, "particip": 26, "conduct": 26, "indoor": 26, "high": 26, "fidel": 26, "microphon": 26, "16khz": 26, "manual": 26, "through": 26, "profession": 26, "annot": 26, "inspect": 26, "free": [26, 31, 45], "academ": 26, "moder": 26, "research": 26, "field": 26, "openslr": 26, "ctc": [26, 29, 32, 36, 37, 40], "stateless": [26, 29, 33, 45, 46, 47], "conv1d": [27, 33, 45, 46, 47], "nn": [27, 33, 35, 36, 45, 46, 47], "tanh": 27, "borrow": 27, "ieeexplor": 27, "ieee": 27, "stamp": 27, "jsp": 27, "arnumb": 27, "9054419": 27, "predict": [27, 31, 33, 45, 46, 47], "charact": 27, "unit": 27, "vocabulari": 27, "87939824": 27, "optimized_transduc": 27, "technqiu": 27, "end": [27, 33, 35, 36, 41, 45, 46, 47], "furthermor": 27, "maximum": 27, "emit": 27, "per": [27, 33, 46, 47], "simplifi": [27, 43], "significantli": 27, "degrad": 27, "exactli": 27, "unprun": 27, "advantag": 27, "minim": 27, "pruned_transducer_stateless": [27, 33, 43, 46], "altern": 27, "though": 27, "transducer_stateless_modifi": 27, "pr": 27, "ram": 27, "tri": 27, "prob": [27, 45], "219": [27, 30], "lagz6hrcqxoigbfd5e0y3q": 27, "03t14": 27, "8477": 27, "250": [27, 34], "sym": [27, 33, 46, 47], "beam_search": [27, 33, 46, 47], "decoding_method": 27, "beam_4": 27, "ensur": 27, "poor": 27, "531": [27, 28], "994": [27, 30], "027": 27, "encoder_out_dim": 27, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 27, "50d2281": 27, "mar": 27, "0815224919": 27, "75d558775b": 27, "mmnv8": 27, "72": [27, 30], "248": 27, "878": [27, 39], "880": 27, "891": 27, "113": [27, 30], "userwarn": 27, "__floordiv__": 27, "round": 27, "toward": 27, "trunc": 27, "floor": 27, "keep": [27, 33, 46, 47], "div": 27, "b": [27, 30, 38, 39], "rounding_mod": 27, "divis": 27, "x_len": 27, "163": [27, 30], "\u6ede": 27, "322": 27, "759": 27, "760": 27, "919": 27, "922": 27, "929": 27, "046": 27, "047": 27, "319": [27, 30], "798": 27, "831": [27, 39], "214": [27, 30], "215": [27, 30, 34], "402": 27, "topk_hyp_index": 27, "topk_index": 27, "logit": 27, "583": [27, 39], "lji9mwuorlow3jkdhxwk8a": 28, "13t11": 28, "4454": 28, "icefall_asr_aishell_tdnn_lstm_ctc": 28, "858": [28, 30], "389": [28, 30], "154": 28, "161": [28, 30], "536": 28, "539": 28, "917": 28, "129": 28, "\u505c\u6ede": 28, "mmi": [29, 32], "blank": [29, 32], "skip": [29, 31, 32, 33, 45, 46, 47], "distil": [29, 32], "hubert": [29, 32], "ligru": [29, 37], "full": [30, 31, 33, 35, 36, 45, 46, 47], "libri": [30, 31, 33, 35, 36, 45, 46, 47], "subset": [30, 33, 35, 36, 45, 46, 47], "3x960": [30, 33, 35, 36, 45, 46, 47], "2880": [30, 33, 35, 36, 45, 46, 47], "lzgnetjwrxc3yghnmd4kpw": 30, "24t16": 30, "4540": 30, "sentenc": 30, "piec": 30, "And": [30, 33, 35, 36, 45, 46, 47], "neither": 30, "nor": 30, "5000": 30, "033": 30, "537": 30, "538": 30, "full_libri": [30, 31], "464": 30, "548": 30, "776": 30, "652": [30, 41], "109226120": 30, "714": [30, 38], "206": 30, "944": 30, "1328": 30, "443": [30, 34], "2563": 30, "494": 30, "592": 30, "1715": 30, "52576": 30, "128": 30, "1424": 30, "807": 30, "506": 30, "808": [30, 38], "362": 30, "1477": 30, "2922": 30, "208": 30, "4295": 30, "52343": 30, "396": 30, "3584": 30, "432": 30, "680": [30, 38], "_pickl": 30, "unpicklingerror": 30, "invalid": 30, "hlg_modifi": 30, "g_4_gram": [30, 34, 38, 39], "sentencepiec": 30, "875": [30, 34], "212k": 30, "267440": [30, 34], "1253": [30, 34], "535k": 30, "83": [30, 34], "77200": [30, 34], "154k": 30, "554": 30, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 30, "8d93169": 30, "601": 30, "025": 30, "broffel": 30, "osom": 30, "723": 30, "775": 30, "881": 30, "234": 30, "571": 30, "whole": [30, 34, 38, 39, 46, 47], "857": 30, "979": 30, "055": 30, "117": 30, "051": 30, "363": 30, "959": [30, 39], "546": 30, "598": 30, "599": [30, 34], "833": 30, "834": 30, "915": 30, "076": 30, "110": 30, "397": 30, "999": [30, 33, 46, 47], "concaten": 30, "bucket": 30, "sampler": 30, "1000": 30, "ctc_decod": 30, "ngram_lm_rescor": 30, "attention_rescor": 30, "kind": [30, 33, 35, 36, 45, 46, 47], "105": 30, "125": [30, 41], "136": 30, "228": 30, "144": 30, "543": 30, "topo": 30, "547": 30, "729": 30, "703": 30, "545": 30, "279": 30, "122": 30, "126": 30, "135": [30, 41], "153": [30, 41], "945": 30, "475": 30, "191": [30, 38, 39], "398": 30, "515": 30, "deseri": 30, "441": 30, "fsaclass": 30, "loadfsa": 30, "const": 30, "string": 30, "c10": 30, "ignor": 30, "dummi": 30, "589": 30, "attention_scal": 30, "162": 30, "169": [30, 38, 39], "188": 30, "984": 30, "624": 30, "519": [30, 39], "632": 30, "645": [30, 41], "243": 30, "303": 30, "179": 30, "knowledg": 31, "vector": 31, "mvq": 31, "kd": 31, "pruned_transducer_stateless4": [31, 33, 43, 46], "theoret": 31, "applic": 31, "minor": 31, "out": 31, "thing": 31, "distillation_with_hubert": 31, "Of": 31, "cours": 31, "xl": 31, "proce": 31, "960h": [31, 35], "use_extracted_codebook": 31, "augment": 31, "th": [31, 38, 39], "fine": 31, "embedding_lay": 31, "num_codebook": 31, "under": 31, "vq_fbank_layer36_cb8": 31, "whola": 31, "snippet": 31, "echo": 31, "awk": 31, "split": 31, "_": 31, "pruned_transducer_stateless6": 31, "12359": 31, "spec": 31, "aug": 31, "warp": 31, "paid": 31, "suitabl": [33, 45, 46, 47], "pruned_transducer_stateless2": [33, 43, 46], "pruned_transducer_stateless5": [33, 43, 46], "scroll": [33, 35, 36, 45, 46, 47], "arxiv": [33, 45, 46, 47], "2206": [33, 45, 46, 47], "13236": [33, 45, 46, 47], "rework": [33, 43, 46], "daniel": [33, 46, 47], "joint": [33, 45, 46, 47], "contrari": [33, 45, 46, 47], "convent": [33, 45, 46, 47], "recurr": [33, 45, 46, 47], "2x": [33, 46, 47], "littl": [33, 46], "436000": [33, 35, 36, 45, 46, 47], "438000": [33, 35, 36, 45, 46, 47], "qogspbgsr8kzcrmmie9jgw": 33, "20t15": [33, 45, 46], "4468": [33, 45, 46], "210171": [33, 45, 46], "access": [33, 35, 36, 45, 46, 47], "googl": [33, 35, 36, 45, 46, 47], "6008": [33, 35, 36, 45, 46, 47], "localhost": [33, 35, 36, 45, 46, 47], "expos": [33, 35, 36, 45, 46, 47], "proxi": [33, 35, 36, 45, 46, 47], "bind_al": [33, 35, 36, 45, 46, 47], "fast_beam_search": [33, 35, 45, 46, 47], "474000": [33, 45, 46, 47], "largest": [33, 46, 47], "posterior": [33, 35, 46, 47], "algorithm": [33, 46, 47], "pdf": [33, 36, 46, 47], "1211": [33, 46, 47], "3711": [33, 46, 47], "espnet": [33, 46, 47], "net": [33, 46, 47], "beam_search_transduc": [33, 46, 47], "basicli": [33, 46, 47], "topk": [33, 46, 47], "expand": [33, 46, 47], "mode": [33, 46, 47], "being": [33, 46, 47], "hardcod": [33, 46, 47], "composit": [33, 46, 47], "log_prob": [33, 46, 47], "hard": [33, 43, 46, 47], "2211": [33, 46, 47], "00484": [33, 46, 47], "fast_beam_search_lg": [33, 46, 47], "trivial": [33, 46, 47], "fast_beam_search_nbest": [33, 46, 47], "random_path": [33, 46, 47], "shortest": [33, 46, 47], "fast_beam_search_nbest_lg": [33, 46, 47], "logic": [33, 46, 47], "smallest": [33, 45, 46, 47], "normal": [34, 38, 39, 41, 46], "icefall_asr_librispeech_tdnn": 34, "lstm_ctc": 34, "flac": 34, "116k": 34, "140k": 34, "343k": 34, "164k": 34, "105k": 34, "174k": 34, "pretraind": 34, "170": 34, "584": [34, 39], "791": 34, "245": 34, "098": 34, "099": 34, "methond": [34, 38, 39], "631": 34, "190": 34, "121": 34, "010": 34, "guidanc": 35, "bigger": 35, "simpli": 35, "discard": 35, "prevent": 35, "lconv": 35, "encourag": [35, 36, 45], "stabil": [35, 36], "doesn": 35, "warm": [35, 36], "xyozukpeqm62hbilud4upa": [35, 36], "ctc_guide_decode_b": 35, "pretrained_ctc": 35, "jit_pretrained_ctc": 35, "100h": 35, "yfyeung": 35, "wechat": 36, "zipformer_mmi": 36, "worker": [36, 45], "hp": 36, "tdnn_ligru_ctc": 38, "enough": [38, 39, 41], "luomingshuang": [38, 39], "icefall_asr_timit_tdnn_ligru_ctc": 38, "pretrained_average_9_25": 38, "fdhc0_si1559": [38, 39], "felc0_si756": [38, 39], "fmgd0_si1564": [38, 39], "ffprobe": [38, 39], "show_format": [38, 39], "nistspher": [38, 39], "database_id": [38, 39], "database_vers": [38, 39], "utterance_id": [38, 39], "dhc0_si1559": [38, 39], "sample_min": [38, 39], "4176": [38, 39], "sample_max": [38, 39], "5984": [38, 39], "bitrat": [38, 39], "pcm_s16le": [38, 39], "s16": [38, 39], "elc0_si756": [38, 39], "1546": [38, 39], "1989": [38, 39], "mgd0_si1564": [38, 39], "7626": [38, 39], "10573": [38, 39], "660": 38, "695": 38, "697": 38, "819": 38, "829": 38, "sil": [38, 39], "dh": [38, 39], "ih": [38, 39], "uw": [38, 39], "ah": [38, 39], "ii": [38, 39], "z": [38, 39], "aa": [38, 39], "ei": [38, 39], "dx": [38, 39], "d": [38, 39], "uh": [38, 39], "ng": [38, 39], "eh": [38, 39], "jh": [38, 39], "er": [38, 39], "ai": [38, 39], "hh": [38, 39], "aw": 38, "ae": [38, 39], "705": 38, "715": 38, "720": 38, "251": [38, 39], "ch": 38, "icefall_asr_timit_tdnn_lstm_ctc": 39, "pretrained_average_16_25": 39, "816": 39, "827": 39, "unk": 39, "739": 39, "971": 39, "977": 39, "978": 39, "981": 39, "ow": 39, "ykubhb5wrmosxykid1z9eg": 41, "23t23": 41, "icefall_asr_yesno_tdnn": 41, "l_disambig": 41, "lexicon_disambig": 41, "0_0_0_1_0_0_0_1": 41, "0_0_1_0_0_0_1_0": 41, "0_0_1_0_0_1_1_1": 41, "0_0_1_0_1_0_0_1": 41, "0_0_1_1_0_0_0_1": 41, "0_0_1_1_0_1_1_0": 41, "0_0_1_1_1_0_0_0": 41, "0_0_1_1_1_1_0_0": 41, "0_1_0_0_0_1_0_0": 41, "0_1_0_0_1_0_1_0": 41, "0_1_0_1_0_0_0_0": 41, "0_1_0_1_1_1_0_0": 41, "0_1_1_0_0_1_1_1": 41, "0_1_1_1_0_0_1_0": 41, "0_1_1_1_1_0_1_0": 41, "1_0_0_0_0_0_0_0": 41, "1_0_0_0_0_0_1_1": 41, "1_0_0_1_0_1_1_1": 41, "1_0_1_1_0_1_1_1": 41, "1_0_1_1_1_1_0_1": 41, "1_1_0_0_0_1_1_1": 41, "1_1_0_0_1_0_1_1": 41, "1_1_0_1_0_1_0_0": 41, "1_1_0_1_1_0_0_1": 41, "1_1_0_1_1_1_1_0": 41, "1_1_1_0_0_1_0_1": 41, "1_1_1_0_1_0_1_0": 41, "1_1_1_1_0_0_1_0": 41, "1_1_1_1_1_0_0_0": 41, "1_1_1_1_1_1_1_1": 41, "54080": 41, "507": 41, "108k": 41, "ye": 41, "hebrew": 41, "NO": 41, "119": 41, "650": 41, "139": 41, "143": 41, "198": 41, "181": 41, "186": 41, "187": 41, "213": 41, "correctli": 41, "simplest": 41, "former": 43, "idea": 43, "mask": [43, 46, 47], "wenet": 43, "did": 43, "request": 43, "metion": 43, "complic": 43, "techniqu": 43, "bank": 43, "memor": 43, "histori": 43, "introduc": 43, "variant": 43, "pruned_stateless_emformer_rnnt2": 43, "conv_emformer_transducer_stateless": 43, "ourself": 43, "mechan": 43, "onlin": 45, "lstm_transducer_stateless": 45, "lower": 45, "prepare_giga_speech": 45, "cj2vtpiwqhkn9q1tx6ptpg": 45, "dynam": [46, 47], "causal": 46, "short": [46, 47], "2012": 46, "05481": 46, "flag": 46, "indic": [46, 47], "whether": 46, "sequenc": [46, 47], "uniformli": [46, 47], "seen": [46, 47], "97vkxf80ru61cnp2alwzzg": 46, "streaming_decod": [46, 47], "wise": [46, 47], "parallel": [46, 47], "bath": [46, 47], "parallelli": [46, 47], "seem": 46, "benefit": 46, "mdoel": 46, "320m": 47, "550": 47, "scriptmodul": 47, "jit_trace_export": 47, "jit_trace_pretrain": 47, "task": 48}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 16, 22, 23], "creat": [2, 15], "recip": [2, 48], "data": [2, 9, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "prepar": [2, 9, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "train": [2, 9, 12, 15, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "decod": [2, 5, 6, 7, 9, 15, 16, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "pre": [2, 12, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "model": [2, 5, 12, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "lodr": [4, 6], "rnn": 4, "transduc": [4, 6, 7, 18, 19, 20, 27, 33, 45, 46, 47], "wer": [4, 5, 6, 7, 30], "differ": [4, 6, 7], "beam": [4, 6, 7, 27], "size": [4, 6, 7], "languag": 5, "lm": [5, 6, 30], "rescor": [5, 6, 25, 30], "base": [5, 6], "method": [5, 6], "v": [5, 6], "shallow": [5, 6, 7], "fusion": [5, 6, 7], "The": [5, 6, 27], "number": [5, 6], "each": [5, 6], "field": [5, 6], "i": [5, 6], "test": [5, 6, 7, 15, 18, 19, 20], "clean": [5, 6, 7], "other": [5, 6], "time": [5, 6, 7], "docker": [8, 9], "introduct": [9, 43], "view": 9, "avail": 9, "tag": 9, "download": [9, 15, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "imag": 9, "run": [9, 16], "gpu": 9, "cpu": 9, "yesno": [9, 40], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 21], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 15, 18, 19, 20, 25, 27, 28, 30], "0": [10, 15], "No": 10, "huggingfac": [11, 13], "space": 13, "youtub": [13, 15], "video": [13, 15], "icefal": [14, 15, 18, 19, 20], "content": [14, 48], "instal": [15, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "cuda": 15, "toolkit": 15, "cudnn": 15, "torch": [15, 18, 19, 20, 22, 23, 33, 35, 36, 45, 46, 47], "torchaudio": 15, "2": [15, 18, 19, 20, 25, 27, 28, 30], "k2": 15, "3": [15, 18, 19, 20, 25, 27, 30], "lhots": 15, "4": [15, 18, 19, 20], "exampl": [15, 21, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "virtual": 15, "environ": 15, "5": [15, 18, 19, 20], "6": [15, 18, 19, 20], "your": 15, "export": [16, 17, 18, 19, 20, 21, 22, 23, 24, 33, 35, 36, 45, 46, 47], "state_dict": [16, 33, 35, 36, 45, 46, 47], "when": [16, 22, 23], "us": [16, 22, 23, 33, 35, 36, 45, 46, 47], "py": 16, "ncnn": [17, 18, 19, 20], "convemform": 18, "pnnx": [18, 19, 20], "via": [18, 19, 20], "jit": [18, 19, 20, 22, 23, 33, 35, 36, 45, 46, 47], "trace": [18, 19, 20, 23, 45, 47], "torchscript": [18, 19, 20], "modifi": [18, 19, 20, 27], "encod": [18, 19, 20], "sherpa": [18, 19, 20, 21, 33, 46, 47], "7": [18, 19], "option": [18, 19, 25, 28, 30, 33, 35, 36, 45, 46, 47], "int8": [18, 19], "quantiz": [18, 19], "lstm": [19, 28, 34, 39, 45], "stream": [20, 29, 42, 43, 46, 47], "zipform": [20, 35, 36, 47], "onnx": 21, "sound": 21, "script": [22, 33, 35, 36, 46, 47], "conform": [25, 30, 43], "ctc": [25, 28, 30, 34, 35, 38, 39, 41], "configur": [25, 28, 30, 33, 35, 36, 45, 46, 47], "log": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "usag": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "case": [25, 27, 28, 30], "kaldifeat": [25, 27, 28, 30, 34, 38, 39, 41], "hlg": [25, 28, 30], "attent": [25, 30], "colab": [25, 27, 28, 30, 34, 38, 39, 41], "notebook": [25, 27, 28, 30, 34, 38, 39, 41], "deploy": [25, 30], "c": [25, 30], "aishel": 26, "stateless": 27, "loss": 27, "todo": 27, "greedi": 27, "search": 27, "tdnn": [28, 34, 38, 39, 41], "non": 29, "asr": [29, 42], "comput": 30, "n": 30, "gram": 30, "distil": 31, "hubert": 31, "codebook": 31, "index": 31, "librispeech": [32, 44], "prune": [33, 46], "statelessx": [33, 46], "pretrain": [33, 35, 36, 45, 46, 47], "deploi": [33, 46, 47], "infer": [34, 38, 39, 41], "blank": 35, "skip": 35, "mmi": 36, "timit": 37, "ligru": 38, "emform": 43, "which": 45, "simul": [46, 47], "real": [46, 47], "tabl": 48}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [27, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [15, "training"], [25, "training"], [27, "training"], [28, "training"], [30, "training"], [31, "training"], [33, "training"], [34, "training"], [35, "training"], [36, "training"], [38, "training"], [39, "training"], [41, "training"], [45, "training"], [46, "training"], [47, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [15, "decoding"], [25, "decoding"], [27, "decoding"], [28, "decoding"], [30, "decoding"], [31, "decoding"], [33, "decoding"], [34, "decoding"], [35, "decoding"], [36, "decoding"], [38, "decoding"], [39, "decoding"], [41, "decoding"], [45, "decoding"], [46, "decoding"], [47, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[5, "id1"], [6, "id3"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [43, "introduction"]], "View available tags": [[9, "view-available-tags"]], "Download a docker image": [[9, "download-a-docker-image"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [15, "data-preparation"], [25, "data-preparation"], [28, "data-preparation"], [30, "data-preparation"], [31, "data-preparation"], [33, "data-preparation"], [34, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [39, "data-preparation"], [41, "data-preparation"], [45, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "Huggingface": [[11, "huggingface"]], "Pre-trained models": [[12, "pre-trained-models"]], "Huggingface spaces": [[13, "huggingface-spaces"]], "YouTube Video": [[13, "youtube-video"], [15, "youtube-video"]], "Icefall": [[14, "icefall"]], "Contents:": [[14, null]], "Installation": [[15, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[15, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[15, "install-torch-and-torchaudio"]], "(2) Install k2": [[15, "install-k2"]], "(3) Install lhotse": [[15, "install-lhotse"]], "(4) Download icefall": [[15, "download-icefall"]], "Installation example": [[15, "installation-example"]], "(1) Create a virtual environment": [[15, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[15, "id1"]], "(3) Install torch and torchaudio": [[15, "id2"]], "(4) Install k2": [[15, "id3"]], "(5) Install lhotse": [[15, "id5"]], "(6) Download icefall": [[15, "id6"]], "Test Your Installation": [[15, "test-your-installation"]], "Export model.state_dict()": [[16, "export-model-state-dict"], [33, "export-model-state-dict"], [35, "export-model-state-dict"], [36, "export-model-state-dict"], [45, "export-model-state-dict"], [46, "export-model-state-dict"], [47, "export-model-state-dict"]], "When to use it": [[16, "when-to-use-it"], [22, "when-to-use-it"], [23, "when-to-use-it"]], "How to export": [[16, "how-to-export"], [22, "how-to-export"], [23, "how-to-export"]], "How to use the exported model": [[16, "how-to-use-the-exported-model"], [22, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[16, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[17, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[18, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[18, "download-the-pre-trained-model"], [19, "download-the-pre-trained-model"], [20, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[18, "install-ncnn-and-pnnx"], [19, "install-ncnn-and-pnnx"], [20, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[18, "export-the-model-via-torch-jit-trace"], [19, "export-the-model-via-torch-jit-trace"], [20, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[18, "export-torchscript-model-via-pnnx"], [19, "export-torchscript-model-via-pnnx"], [20, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[18, "test-the-exported-models-in-icefall"], [19, "test-the-exported-models-in-icefall"], [20, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[18, "modify-the-exported-encoder-for-sherpa-ncnn"], [19, "modify-the-exported-encoder-for-sherpa-ncnn"], [20, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[18, "optional-int8-quantization-with-sherpa-ncnn"], [19, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[19, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[20, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[21, "export-to-onnx"]], "sherpa-onnx": [[21, "sherpa-onnx"]], "Example": [[21, "example"]], "Download the pre-trained model": [[21, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [27, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [39, "download-the-pre-trained-model"], [41, "download-the-pre-trained-model"]], "Export the model to ONNX": [[21, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[21, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[22, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[23, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[23, "how-to-use-the-exported-models"]], "Model export": [[24, "model-export"]], "Conformer CTC": [[25, "conformer-ctc"], [30, "conformer-ctc"]], "Configurable options": [[25, "configurable-options"], [28, "configurable-options"], [30, "configurable-options"], [33, "configurable-options"], [35, "configurable-options"], [36, "configurable-options"], [45, "configurable-options"], [46, "configurable-options"], [47, "configurable-options"]], "Pre-configured options": [[25, "pre-configured-options"], [28, "pre-configured-options"], [30, "pre-configured-options"], [33, "pre-configured-options"], [35, "pre-configured-options"], [36, "pre-configured-options"], [45, "pre-configured-options"], [46, "pre-configured-options"], [47, "pre-configured-options"]], "Training logs": [[25, "training-logs"], [27, "training-logs"], [28, "training-logs"], [30, "training-logs"], [33, "training-logs"], [35, "training-logs"], [36, "training-logs"], [45, "training-logs"], [46, "training-logs"], [47, "training-logs"]], "Usage examples": [[25, "usage-examples"], [27, "usage-examples"], [28, "usage-examples"], [30, "usage-examples"]], "Case 1": [[25, "case-1"], [27, "case-1"], [28, "case-1"], [30, "case-1"]], "Case 2": [[25, "case-2"], [27, "case-2"], [28, "case-2"], [30, "case-2"]], "Case 3": [[25, "case-3"], [27, "case-3"], [30, "case-3"]], "Pre-trained Model": [[25, "pre-trained-model"], [27, "pre-trained-model"], [28, "pre-trained-model"], [30, "pre-trained-model"], [34, "pre-trained-model"], [38, "pre-trained-model"], [39, "pre-trained-model"], [41, "pre-trained-model"]], "Install kaldifeat": [[25, "install-kaldifeat"], [27, "install-kaldifeat"], [28, "install-kaldifeat"], [30, "install-kaldifeat"], [34, "install-kaldifeat"], [38, "install-kaldifeat"], [39, "install-kaldifeat"]], "Usage": [[25, "usage"], [27, "usage"], [28, "usage"], [30, "usage"]], "CTC decoding": [[25, "ctc-decoding"], [30, "ctc-decoding"], [30, "id2"]], "HLG decoding": [[25, "hlg-decoding"], [25, "id2"], [28, "hlg-decoding"], [30, "hlg-decoding"], [30, "id3"]], "HLG decoding + attention decoder rescoring": [[25, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[25, "colab-notebook"], [27, "colab-notebook"], [28, "colab-notebook"], [30, "colab-notebook"], [34, "colab-notebook"], [38, "colab-notebook"], [39, "colab-notebook"], [41, "colab-notebook"]], "Deployment with C++": [[25, "deployment-with-c"], [30, "deployment-with-c"]], "aishell": [[26, "aishell"]], "Stateless Transducer": [[27, "stateless-transducer"]], "The Model": [[27, "the-model"]], "The Loss": [[27, "the-loss"]], "Todo": [[27, "id1"]], "Greedy search": [[27, "greedy-search"]], "Beam search": [[27, "beam-search"]], "Modified Beam search": [[27, "modified-beam-search"]], "TDNN-LSTM CTC": [[28, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[29, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[30, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[30, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[30, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[30, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[30, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[31, "distillation-with-hubert"]], "Codebook index preparation": [[31, "codebook-index-preparation"]], "LibriSpeech": [[32, "librispeech"], [44, "librispeech"]], "Pruned transducer statelessX": [[33, "pruned-transducer-statelessx"], [46, "pruned-transducer-statelessx"]], "Usage example": [[33, "usage-example"], [35, "usage-example"], [36, "usage-example"], [45, "usage-example"], [46, "usage-example"], [47, "usage-example"]], "Export Model": [[33, "export-model"], [46, "export-model"], [47, "export-model"]], "Export model using torch.jit.script()": [[33, "export-model-using-torch-jit-script"], [35, "export-model-using-torch-jit-script"], [36, "export-model-using-torch-jit-script"], [46, "export-model-using-torch-jit-script"], [47, "export-model-using-torch-jit-script"]], "Download pretrained models": [[33, "download-pretrained-models"], [35, "download-pretrained-models"], [36, "download-pretrained-models"], [45, "download-pretrained-models"], [46, "download-pretrained-models"], [47, "download-pretrained-models"]], "Deploy with Sherpa": [[33, "deploy-with-sherpa"], [46, "deploy-with-sherpa"], [47, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[34, "tdnn-lstm-ctc"], [39, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[34, "inference-with-a-pre-trained-model"], [38, "inference-with-a-pre-trained-model"], [39, "inference-with-a-pre-trained-model"], [41, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[35, "zipformer-ctc-blank-skip"]], "Export models": [[35, "export-models"], [36, "export-models"], [45, "export-models"]], "Zipformer MMI": [[36, "zipformer-mmi"]], "TIMIT": [[37, "timit"]], "TDNN-LiGRU-CTC": [[38, "tdnn-ligru-ctc"]], "YesNo": [[40, "yesno"]], "TDNN-CTC": [[41, "tdnn-ctc"]], "Download kaldifeat": [[41, "download-kaldifeat"]], "Streaming ASR": [[42, "streaming-asr"]], "Streaming Conformer": [[43, "streaming-conformer"]], "Streaming Emformer": [[43, "streaming-emformer"]], "LSTM Transducer": [[45, "lstm-transducer"]], "Which model to use": [[45, "which-model-to-use"]], "Export model using torch.jit.trace()": [[45, "export-model-using-torch-jit-trace"], [47, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[46, "simulate-streaming-decoding"], [47, "simulate-streaming-decoding"]], "Real streaming decoding": [[46, "real-streaming-decoding"], [47, "real-streaming-decoding"]], "Zipformer Transducer": [[47, "zipformer-transducer"]], "Recipes": [[48, "recipes"]], "Table of Contents": [[48, null]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 36, 37, 40, 44, 45, 47, 49], "tool": [0, 10, 21, 24], "make": [0, 1, 3, 24, 25, 26, 31, 33, 36, 49], "consist": [0, 33, 39, 51, 52, 53], "possibl": [0, 2, 3, 31, 36], "black": 0, "format": [0, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "flake8": 0, "check": [0, 21, 36], "qualiti": [0, 32], "isort": 0, "sort": [0, 21], "import": [0, 9, 10, 15, 21, 24, 52, 53], "The": [0, 1, 2, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 26, 31, 32, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "version": [0, 9, 13, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 52], "abov": [0, 4, 6, 7, 10, 13, 15, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 39, 41, 42, 47, 49, 51, 52, 53], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53, 54], "22": [0, 9, 15, 21, 24, 25, 36, 44, 45, 47], "3": [0, 4, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 27, 30, 34, 37, 39, 40, 41, 42, 47, 51, 52, 53], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "5": [0, 7, 15, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "4": [0, 4, 5, 6, 7, 9, 10, 11, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "10": [0, 7, 9, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 27, 28, 29, 30, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "after": [0, 1, 6, 9, 11, 12, 13, 16, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "run": [0, 2, 8, 10, 11, 13, 14, 15, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "git": [0, 4, 6, 7, 9, 13, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47], "clone": [0, 4, 6, 7, 13, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "github": [0, 2, 6, 9, 11, 13, 15, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "com": [0, 2, 6, 9, 11, 13, 18, 19, 21, 22, 24, 25, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "k2": [0, 2, 9, 10, 13, 15, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 51, 52, 53], "fsa": [0, 2, 9, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 51, 52, 53], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 18, 19, 22, 23, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "cd": [0, 1, 2, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "pip": [0, 1, 6, 10, 13, 15, 21, 24, 27, 33], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 19, 20, 22, 23, 27, 30, 37, 39, 41, 42, 47, 51, 52, 53], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 17, 19, 20, 21, 23, 30, 37], "commit": [0, 21], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "automat": [0, 14, 19, 37], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 19, 24, 25, 26, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "ani": [0, 4, 6, 7, 13, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 17, 19, 20, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "wa": [0, 22, 36, 40], "success": [0, 21, 24, 25], "pleas": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "fix": [0, 9, 10, 13, 24, 25, 26, 36], "issu": [0, 4, 6, 7, 10, 21, 24, 25, 36, 37, 52, 53], "report": [0, 9, 10, 37], "some": [0, 1, 4, 6, 22, 24, 25, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "i": [0, 1, 2, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "e": [0, 2, 4, 5, 6, 7, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "modifi": [0, 23, 30, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "file": [0, 2, 9, 14, 15, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "place": [0, 21, 22, 33, 36, 40], "so": [0, 4, 6, 7, 9, 13, 19, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 15, 19, 21, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 52, 53], "ha": [0, 2, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 49, 51, 52, 53], "been": [0, 21, 23, 24, 25, 26, 33], "befor": [0, 1, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "all": [0, 9, 11, 13, 14, 18, 19, 22, 24, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "again": [0, 24, 25, 47], "should": [0, 2, 4, 6, 11, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "time": [0, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "succeed": 0, "want": [0, 4, 6, 7, 11, 13, 15, 21, 22, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "do": [0, 2, 4, 6, 13, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 19, 31, 36], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "prepar": [1, 3, 4, 8, 14, 16, 20, 22], "environ": [1, 10, 11, 12, 14, 16, 20, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 44, 45, 47, 52, 53], "doc": [1, 22, 49], "r": [1, 13, 21, 24, 25, 26, 44, 45], "requir": [1, 4, 6, 11, 13, 15, 21, 26, 37, 52, 53], "txt": [1, 4, 9, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47], "set": [1, 4, 6, 7, 10, 12, 13, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "up": [1, 21, 22, 24, 25, 26, 31, 34, 36, 37, 39, 40, 41, 42, 52, 53], "readi": [1, 31, 36, 37], "refer": [1, 2, 6, 7, 11, 13, 15, 21, 22, 23, 24, 25, 26, 28, 29, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 49, 52, 53], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "local": [1, 9, 15, 21, 39, 41, 42, 51, 52, 53], "preview": 1, "what": [1, 2, 11, 15, 21, 24, 25, 26, 33, 49], "look": [1, 2, 4, 6, 7, 14, 18, 21, 24, 25, 26, 31, 33, 34, 36, 37], "like": [1, 2, 9, 11, 19, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 49, 51, 52], "publish": [1, 22, 32], "html": [1, 2, 10, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 51, 52, 53], "gener": [1, 6, 9, 14, 15, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "view": [1, 8, 20, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52, 53], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "python3": [1, 9, 10, 13, 15, 21, 25, 26], "m": [1, 15, 21, 24, 25, 26, 33, 39, 41, 42, 44, 45, 51, 52, 53], "server": [1, 19, 51], "It": [1, 2, 6, 7, 9, 11, 14, 15, 17, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "print": [1, 12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "serv": [1, 39, 41, 42, 51, 52, 53], "port": [1, 14, 37, 39, 41, 42, 51, 52, 53], "8000": [1, 11, 15, 47], "open": [1, 4, 6, 7, 9, 20, 22, 24, 25, 26, 32, 33, 36, 37], "browser": [1, 17, 19, 39, 41, 42, 51, 52, 53], "go": [1, 7, 31, 33, 36, 39, 41, 42, 51, 52, 53], "read": [2, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "code": [2, 3, 8, 10, 13, 15, 20, 21, 24, 25, 26, 31, 36, 37, 39, 40, 44, 45, 47, 49, 52, 53], "style": [2, 3, 20], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 51, 52, 53], "recommend": [2, 6, 7, 21, 31, 33, 34, 36, 37, 39, 52, 53], "test": [2, 4, 9, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 40, 41, 44, 45], "valid": [2, 21, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "dataset": [2, 10, 11, 13, 14, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "lhots": [2, 9, 11, 13, 15, 20, 22, 24, 25, 26, 31, 33, 36], "readthedoc": [2, 11, 21], "io": [2, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 51, 52, 53], "en": [2, 11, 21, 24], "latest": [2, 9, 11, 13, 19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "index": [2, 21, 23, 24, 25, 26, 27, 28, 29, 51, 52, 53], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 35, 47, 54], "veri": [2, 3, 7, 13, 24, 25, 26, 33, 44, 45, 47, 52, 53], "good": [2, 7], "exampl": [2, 11, 13, 19, 20, 22, 24, 25, 26, 28, 29, 30, 37, 40, 44, 45, 47], "speech": [2, 11, 13, 14, 19, 20, 21, 23, 32, 33, 47, 54], "pull": [2, 4, 6, 7, 9, 24, 25, 26, 27, 31, 33, 36, 49], "380": [2, 24, 45], "show": [2, 4, 6, 7, 9, 15, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "add": [2, 11, 24, 25, 26, 31, 33, 34, 52, 54], "new": [2, 3, 9, 13, 19, 21, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 47, 51, 52, 53], "suppos": [2, 9, 52, 53], "would": [2, 11, 22, 24, 25, 26, 36, 40, 52, 53], "name": [2, 9, 10, 13, 15, 22, 24, 25, 26, 27, 31, 33, 39, 41, 42, 52, 53], "foo": [2, 29, 31, 36, 39, 41, 42, 51, 52, 53], "eg": [2, 9, 10, 11, 12, 15, 16, 18, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "mkdir": [2, 24, 25, 31, 33, 34, 36, 40, 44, 45, 47], "p": [2, 4, 13, 21, 24, 25, 33, 44, 45], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "touch": 2, "sh": [2, 9, 11, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "chmod": 2, "x": [2, 4, 26, 49], "simpl": [2, 12, 14, 16, 21, 33], "own": [2, 11, 37, 39, 52, 53], "otherwis": [2, 24, 25, 26, 31, 33, 36, 37, 39, 41, 42, 51, 52, 53], "librispeech": [2, 4, 6, 7, 10, 18, 20, 22, 24, 25, 26, 27, 28, 29, 35, 36, 37, 39, 40, 41, 42, 48, 49, 51, 52, 53, 54], "assum": [2, 4, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 51, 52, 53], "fanci": 2, "call": [2, 10, 27, 37], "bar": [2, 29, 31, 36, 39, 41, 42, 51, 52, 53], "organ": 2, "wai": [2, 3, 15, 30, 39, 41, 42, 49, 51, 52, 53], "readm": [2, 31, 33, 34, 36, 40, 44, 45, 47], "md": [2, 18, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "asr_datamodul": [2, 9, 10, 15, 21], "pretrain": [2, 4, 6, 7, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 40, 44, 45, 47], "For": [2, 4, 6, 7, 10, 14, 18, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "instanc": [2, 10, 12, 16, 18, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "tdnn": [2, 9, 10, 12, 15, 16, 21, 32, 35, 38, 43, 46], "its": [2, 4, 22, 23, 24, 25, 26, 29, 33, 41], "directori": [2, 9, 11, 13, 20, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "structur": [2, 26], "descript": [2, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "contain": [2, 8, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53, 54], "inform": [2, 4, 6, 11, 12, 16, 21, 22, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 49, 51, 52, 53], "g": [2, 4, 5, 6, 7, 11, 13, 21, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "wer": [2, 9, 12, 15, 21, 22, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "etc": [2, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "provid": [2, 11, 15, 19, 21, 22, 23, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53, 54], "pytorch": [2, 10, 13, 21, 24, 25, 26, 33], "dataload": [2, 21], "take": [2, 7, 9, 22, 37, 39, 47, 52, 53], "input": [2, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47, 49], "checkpoint": [2, 4, 6, 7, 12, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "save": [2, 15, 16, 21, 22, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "dure": [2, 4, 5, 7, 10, 13, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "stage": [2, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "": [2, 4, 6, 7, 9, 14, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "definit": [2, 24, 25], "neural": [2, 4, 6, 7, 31, 36], "network": [2, 31, 33, 36, 39, 41, 42, 51, 52, 53], "script": [2, 6, 7, 13, 14, 20, 21, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 51], "infer": [2, 22, 24, 25], "tdnn_lstm_ctc": [2, 34, 40, 45], "conformer_ctc": [2, 31, 36], "get": [2, 9, 13, 14, 15, 19, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 47, 49, 51, 52, 53], "feel": [2, 37, 51], "result": [2, 4, 7, 9, 16, 18, 19, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "everi": [2, 22, 39, 41, 42, 51, 52, 53], "kept": [2, 39, 52, 53], "self": [2, 23, 26, 49], "toler": 2, "duplic": 2, "among": [2, 21], "differ": [2, 12, 21, 24, 25, 26, 27, 31, 32, 36, 37, 39, 49, 51, 52, 53], "invoc": [2, 24, 25], "help": [2, 12, 14, 16, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "blob": [2, 11, 18, 21, 22, 29, 39, 41, 42, 51, 52, 53], "master": [2, 6, 9, 11, 15, 18, 21, 22, 25, 26, 28, 29, 33, 37, 39, 41, 42, 51, 52, 53], "transform": [2, 6, 7, 31, 36, 51], "conform": [2, 28, 32, 33, 35, 38, 39, 41, 51, 52, 53], "base": [2, 4, 7, 13, 26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "lstm": [2, 23, 29, 30, 32, 35, 38, 43, 48, 50], "attent": [2, 26, 33, 34, 37, 49, 52, 53], "lm": [2, 4, 7, 9, 11, 20, 21, 33, 39, 40, 44, 45, 47, 52, 53], "rescor": [2, 20, 34, 40, 42, 44, 45, 47], "demonstr": [2, 14, 15, 17, 19, 22, 27], "consid": [2, 4, 26], "colab": [2, 21], "notebook": [2, 21], "welcom": 3, "There": [3, 4, 15, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "mani": [3, 12, 21, 52, 53], "two": [3, 4, 11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "them": [3, 5, 6, 17, 18, 19, 24, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "To": [3, 4, 6, 7, 11, 15, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "document": [3, 20, 22, 23, 24, 25, 26, 27, 42], "repositori": [3, 9, 24, 25, 26, 27], "recip": [3, 4, 6, 7, 9, 11, 15, 18, 20, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 49, 51, 52, 53], "In": [3, 4, 6, 10, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 49], "page": [3, 19, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "describ": [3, 5, 8, 9, 17, 22, 24, 25, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 44, 45, 52, 53], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "creat": [3, 4, 6, 7, 14, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 32], "train": [3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 28, 29, 30, 49], "decod": [3, 4, 8, 10, 11, 14, 15, 19, 20, 24, 25, 26, 29, 30], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 19, 20, 21, 23, 37, 49], "As": [4, 5, 6, 7, 24, 33, 36, 37], "type": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 36, 39, 41, 42, 47, 49, 51, 52, 53], "e2": [4, 7, 21], "usual": [4, 6, 7, 12, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "an": [4, 5, 6, 7, 9, 11, 13, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 36, 37, 39, 42, 47, 51, 52, 53], "intern": [4, 5], "languag": [4, 7, 11, 19, 20, 31, 33, 34], "learn": [4, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "level": [4, 5, 15], "corpu": [4, 6, 7, 32], "real": 4, "life": 4, "scenario": 4, "often": [4, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "mismatch": [4, 52], "between": [4, 7, 39, 52, 53], "target": [4, 19, 21], "space": [4, 17, 20], "problem": [4, 6, 7, 21, 37], "when": [4, 6, 9, 10, 15, 19, 24, 25, 26, 30, 33, 36, 37, 39, 41, 42, 52, 53], "act": 4, "against": [4, 21], "extern": [4, 5, 6, 7], "tutori": [4, 6, 7, 13, 15, 20, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 51, 52, 53], "low": [4, 24, 25], "order": [4, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 26], "improv": [4, 5, 6, 7, 33], "perform": [4, 6, 7, 23, 33, 37, 52], "languga": 4, "integr": [4, 19], "pruned_transducer_stateless7_stream": [4, 6, 7, 26, 27, 53], "stream": [4, 6, 7, 15, 20, 23, 24, 25, 27, 30, 31, 36, 44, 45, 51, 54], "howev": [4, 6, 7, 22, 25, 37], "easili": [4, 6, 7, 31, 34, 36], "appli": [4, 6, 7, 33, 49], "other": [4, 7, 13, 14, 15, 22, 25, 26, 27, 33, 36, 37, 39, 40, 44, 45, 47, 49, 52, 53, 54], "encount": [4, 6, 7, 10, 21, 26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "here": [4, 6, 7, 22, 24, 25, 26, 31, 33, 34, 36, 37, 40, 49, 52], "simplic": [4, 6, 7], "same": [4, 6, 7, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 18, 28, 51], "first": [4, 6, 9, 10, 11, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "let": [4, 6, 7, 14, 21, 24, 25, 26, 31, 36], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 33, 49, 53], "address": [4, 9, 15, 19, 21, 22, 24, 25, 26, 33, 39, 42, 51, 52, 53], "sourc": [4, 11, 13, 21, 22, 24, 25, 26, 31, 32, 33, 36], "acoust": [4, 52, 53], "similar": [4, 5, 37, 41, 52, 53], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "score": [4, 5, 7, 31, 36, 39, 52, 53], "left": [4, 24, 26, 33, 52, 53], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 24, 33, 49, 52], "log": [4, 9, 10, 12, 15, 16, 21, 24, 25, 26, 40, 44, 45, 47], "y_": 4, "u": [4, 21, 24, 25, 26, 31, 33, 34, 36, 37, 47], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 10, 52], "weight": [4, 15, 31, 34, 36, 41, 42, 51], "respect": 4, "onli": [4, 6, 8, 9, 11, 13, 14, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53, 54], "compar": [4, 24, 25, 26, 52], "shallow": [4, 20], "fusion": [4, 20], "subtract": [4, 5], "work": [4, 9, 13, 15, 24, 25, 26, 36], "treat": [4, 25, 26], "predictor": 4, "joiner": [4, 24, 25, 26, 27, 29, 33, 39, 51, 52, 53], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 21, 31, 37, 39, 41, 42, 44, 45, 51, 52, 53], "gram": [4, 6, 21, 31, 33, 34, 39, 40, 42, 44, 45, 52, 53], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "formula": 4, "rnnt": [4, 39, 52, 53], "bi": [4, 6], "addit": 4, "estim": 4, "comar": 4, "li": 4, "choic": 4, "accord": 4, "origin": [4, 5], "paper": [4, 5, 37, 39, 51, 52, 53], "achiev": [4, 6, 7, 49], "both": [4, 39, 41, 42, 49, 51, 52, 53], "intra": 4, "cross": 4, "much": [4, 24, 25], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 13, 15, 21, 24, 25, 26, 31, 36, 37, 39, 40, 41, 42, 44, 45, 51, 52, 53], "illustr": [4, 6, 7], "purpos": [4, 6, 7, 24, 25], "from": [4, 6, 7, 9, 10, 11, 14, 15, 17, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "link": [4, 6, 7, 18, 21, 22, 23, 39, 41, 42, 51, 52, 53], "scratch": [4, 6, 7, 39, 41, 42, 51, 52, 53], "prune": [4, 6, 7, 22, 26, 27, 33, 35, 37, 38, 48, 49, 50, 51, 53], "statelessx": [4, 6, 7, 35, 37, 38, 48, 49, 50], "initi": [4, 6, 7, 9, 31, 34], "step": [4, 6, 7, 11, 14, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "download": [4, 6, 7, 8, 10, 13, 15, 19, 20, 23, 30, 32, 37], "git_lfs_skip_smudg": [4, 6, 7, 24, 25, 26, 27], "huggingfac": [4, 6, 7, 18, 20, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 41, 42, 44, 45, 47, 51], "co": [4, 6, 7, 18, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 40, 41, 42, 44, 45, 47, 51], "zengwei": [4, 6, 7, 24, 26, 27, 42, 51], "stateless7": [4, 6, 7, 26, 27], "2022": [4, 6, 7, 22, 24, 25, 26, 27, 33, 39, 41, 42, 51, 52], "12": [4, 6, 7, 9, 14, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 44, 47, 51, 52, 53], "29": [4, 6, 7, 21, 26, 27, 31, 33, 34, 36, 40, 41, 44, 45], "pushd": [4, 6, 7, 27], "exp": [4, 6, 7, 9, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "lf": [4, 6, 7, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 42, 44, 45, 47], "includ": [4, 6, 7, 24, 25, 26, 27, 39, 41, 42, 51, 52, 53], "pt": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "ln": [4, 6, 7, 9, 15, 22, 24, 25, 26, 27, 31, 36, 39, 41, 42, 51, 52, 53], "epoch": [4, 6, 7, 9, 12, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "99": [4, 6, 7, 15, 21, 24, 25, 26, 27], "symbol": [4, 5, 6, 7, 21, 33, 39, 52, 53], "load": [4, 6, 7, 9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "done": [4, 6, 7, 9, 13, 15, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "via": [4, 6, 7, 14, 21, 23, 28, 29, 30], "exp_dir": [4, 6, 7, 9, 15, 21, 24, 25, 26, 33, 36, 37, 39, 41, 42, 52, 53], "avg": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "averag": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "fals": [4, 6, 7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37], "dir": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "bpe": [4, 5, 6, 7, 22, 24, 25, 26, 27, 29, 36, 39, 41, 42, 51, 52, 53], "lang_bpe_500": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 36, 39, 41, 42, 51, 52, 53], "max": [4, 6, 7, 21, 22, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "durat": [4, 6, 7, 11, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "600": [4, 6, 7, 21, 22, 36, 39, 41, 51, 52, 53], "chunk": [4, 6, 7, 24, 26, 27, 52, 53], "len": [4, 6, 7, 26, 27, 53], "32": [4, 6, 7, 21, 24, 25, 26, 27, 31, 33, 34, 53], "method": [4, 7, 15, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 51, 52, 53], "modified_beam_search": [4, 5, 6, 7, 19, 33, 37, 39, 41, 51, 52, 53], "clean": [4, 9, 15, 21, 26, 31, 33, 36, 37, 39, 40, 41, 42, 51, 52, 53], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 21, 24, 25, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "best": [4, 5, 6, 7, 24, 25, 26, 31, 34, 36], "7": [4, 6, 7, 9, 21, 22, 23, 26, 30, 31, 34, 36, 39, 40, 44, 45, 51, 52], "93": [4, 6, 7, 15], "Then": [4, 6], "necessari": [4, 37], "note": [4, 5, 6, 7, 10, 11, 15, 22, 24, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "960": [4, 36, 39, 41, 42, 51, 52, 53], "hour": [4, 13, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "ezerhouni": [4, 6, 7], "popd": [4, 6, 7, 27], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 21, 33, 47], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 21, 36], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 21, 25, 31, 36, 47], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 21, 24, 25, 34, 40, 44, 45, 47], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 24, 25, 31, 36, 37, 40, 42, 44, 45], "embed": [4, 6, 7, 33, 39, 51, 52, 53], "dim": [4, 6, 7, 24, 25, 26, 33, 39, 52], "2048": [4, 6, 7, 22, 24, 25, 26, 33], "hidden": [4, 6, 7, 25, 51], "num": [4, 6, 7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "layer": [4, 6, 7, 24, 25, 26, 33, 37, 39, 49, 51, 52, 53], "vocab": [4, 6, 7, 36], "500": [4, 6, 7, 22, 24, 25, 26, 33, 36, 42, 51], "token": [4, 11, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47], "ngram": [4, 36, 40, 44, 45], "2": [4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 30, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "extra": [4, 24, 25, 26, 33, 49, 52], "argument": [4, 7, 15, 37, 49], "need": [4, 6, 11, 13, 14, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "given": [4, 9, 11, 12, 13, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 52, 53], "specifi": [4, 7, 10, 12, 15, 16, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "neg": [4, 33], "number": [4, 7, 16, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "obtain": [4, 7, 31, 33, 34, 36, 40, 44, 45], "shown": [4, 7], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52], "61": [4, 6], "6": [4, 6, 7, 9, 10, 11, 15, 23, 30, 31, 33, 36, 39, 40, 44, 45, 51], "74": [4, 6, 21, 22], "recal": 4, "lowest": [4, 12, 15, 39, 41, 42, 51, 52, 53], "77": [4, 6, 7, 21, 36], "08": [4, 6, 7, 9, 15, 26, 36, 40, 42, 44, 45, 47, 51], "inde": 4, "even": [4, 19, 21, 25], "better": [4, 6], "increas": [4, 6, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "8": [4, 6, 7, 9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 47, 51, 52, 53], "45": [4, 6, 15, 21, 24, 26, 31, 33, 36], "38": [4, 6, 21, 24, 31, 33, 36, 44], "23": [4, 6, 9, 10, 11, 15, 21, 24, 25, 26, 31, 33, 34, 36, 44, 45, 47], "section": [5, 8, 9, 10, 17, 21, 22, 27, 28, 29, 30, 31, 36], "langugag": 5, "transduc": [5, 20, 22, 23, 27, 30, 32, 35, 37, 38, 48, 49, 50], "avail": [5, 6, 8, 15, 20, 21, 22, 24, 25, 26, 31, 33, 36, 40, 44, 45, 47, 51], "beam": [5, 22, 51], "search": [5, 6, 7, 18, 19], "realli": [5, 31, 34, 36, 39, 41, 42, 51, 52, 53], "valu": [5, 7, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "rnn": [5, 6, 7, 20, 25, 33, 39, 41, 51, 52, 53], "t": [5, 13, 14, 15, 21, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "doe": [5, 15, 24, 25, 26, 31, 33, 36, 47], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 17, 18, 21, 22, 23, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 47, 49, 51, 52, 53], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6], "re": [5, 6, 10, 31, 34, 36, 37, 39, 41, 42, 49, 51, 52, 53], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 20], "commonli": [6, 7, 31, 33, 34, 36, 40, 44, 45, 47], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 14, 21, 24, 25, 26, 31, 36, 37, 47, 49, 51, 52], "effici": [6, 7, 39, 52, 53], "than": [6, 21, 22, 25, 31, 33, 34, 36, 39, 40, 41, 42, 47, 51, 52, 53], "sinc": [6, 13, 21, 24, 25, 26, 37, 47, 51], "less": [6, 22, 36, 40, 47, 52, 53], "comput": [6, 15, 21, 22, 24, 25, 26, 31, 33, 34, 37, 39, 40, 42, 44, 45, 47, 51, 52, 53], "gpu": [6, 7, 8, 13, 14, 20, 21, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 44, 45, 47, 51, 52, 53], "try": [6, 10, 12, 15, 17, 19, 37, 39, 41, 42, 51, 52, 53], "might": [6, 7, 25, 26, 52, 53], "ideal": [6, 7], "mai": [6, 7, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53, 54], "With": [6, 21], "43": [6, 9, 25, 26, 36], "great": 6, "made": [6, 24], "boost": [6, 7], "tabl": [6, 19, 24, 25, 26], "67": [6, 21], "59": [6, 15, 21, 24, 34, 36], "86": 6, "fact": 6, "arpa": [6, 11, 47], "performn": 6, "depend": [6, 14, 15, 21, 31, 36], "kenlm": 6, "kpu": 6, "archiv": 6, "zip": 6, "execut": [6, 7, 13, 24, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "9": [6, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 47, 51, 52, 53], "57": [6, 21, 25, 36, 40], "slightli": 6, "63": [6, 33], "04": [6, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "52": [6, 21, 31, 36], "73": 6, "mention": 6, "earlier": 6, "benchmark": [6, 33], "speed": [6, 24, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "132": 6, "95": [6, 32], "177": [6, 21, 22, 25, 26, 33, 34, 36], "96": [6, 21], "210": [6, 44, 45], "262": [6, 7, 15], "62": [6, 7, 21, 36, 40], "65": [6, 7, 21, 24], "352": [6, 7, 36], "58": [6, 7, 10, 21, 36], "488": [6, 7, 24, 25, 26], "400": [6, 9, 32], "610": 6, "870": 6, "156": [6, 15], "203": [6, 15, 22, 36], "255": [6, 25, 26], "160": [6, 15], "263": [6, 9, 15, 21, 25], "singl": [6, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "32g": 6, "v100": [6, 31, 33, 34, 36], "vari": 6, "word": [7, 11, 12, 15, 31, 33, 34, 36, 40, 44, 45, 47], "error": [7, 9, 10, 12, 13, 15, 21, 24, 25, 26, 36], "rate": [7, 12, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "These": [7, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "alreadi": [7, 11, 13, 21, 22], "But": [7, 24, 39, 41, 42, 51, 52, 53], "long": [7, 24], "true": [7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "either": [7, 15, 19, 31, 33, 34, 36, 52, 53], "choos": [7, 19, 21, 37, 39, 41, 42, 51, 52, 53], "three": [7, 15, 24, 25, 26, 29, 31, 33, 49], "associ": 7, "dimens": [7, 39, 52, 53], "obviou": 7, "rel": 7, "reduct": [7, 15, 21, 24, 25, 41], "around": 7, "A": [7, 14, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 51, 52, 53], "few": [7, 11, 24, 25, 26, 37], "paramet": [7, 14, 22, 24, 25, 26, 28, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 51, 52, 53], "tune": [7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "control": [7, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "too": 7, "small": [7, 33, 44, 45, 47], "fulli": 7, "util": [7, 9, 10, 15, 21, 36], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 31, 33, 34, 36], "activ": [7, 13, 19, 21], "path": [7, 9, 15, 19, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "trade": 7, "off": [7, 24], "accuraci": [7, 24, 25, 32], "larger": [7, 25, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "slower": 7, "built": [8, 9, 21], "imag": [8, 20], "cpu": [8, 12, 13, 14, 15, 16, 20, 21, 22, 24, 25, 26, 28, 31, 39, 41, 42, 47, 52, 53], "still": [8, 24, 25, 26], "introduct": [8, 20, 48, 54], "tag": [8, 20], "within": [8, 14, 17, 19, 20, 24, 25], "updat": [8, 24, 25, 26], "host": [9, 22], "hub": 9, "k2fsa": 9, "find": [9, 10, 16, 17, 18, 19, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "dockerfil": 9, "tree": [9, 11, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 51], "item": [9, 14], "curl": 9, "registri": 9, "v2": [9, 26, 31, 36], "jq": 9, "give": [9, 11, 15, 33], "someth": [9, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "torch2": [9, 13, 15], "cuda11": [9, 10, 21], "torch1": [9, 10, 21], "cuda10": 9, "13": [9, 10, 15, 21, 22, 24, 25, 26, 33, 34, 36, 40, 41, 44], "releas": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "torch": [9, 10, 13, 14, 20, 22, 23, 30, 31, 33, 36], "select": [9, 12, 13, 14, 19, 21, 24, 25, 26, 39, 40, 44, 45, 47, 51, 52, 53], "appropri": [9, 21], "combin": [9, 12, 24, 25, 26], "cuda": [9, 10, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 51, 52, 53], "sudo": [9, 31, 34], "rm": 9, "bin": [9, 13, 21, 24, 25, 26, 31, 36], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 19, 21, 22, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "interfac": 9, "present": [9, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "root": [9, 24, 25, 26], "60c947eac59c": 9, "workspac": 9, "current": [9, 19, 24, 25, 33, 37, 49, 51, 52, 53, 54], "user": [9, 10], "copi": [9, 21, 49], "switch": [9, 21, 31, 36, 42], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 21, 26], "site": [9, 10, 15, 21, 26], "packag": [9, 10, 15, 21, 26], "__init__": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36], "line": [9, 10, 11, 24, 25, 26, 39, 52, 53], "modul": [9, 13, 20, 24, 26, 41, 52], "_k2": [9, 10, 21], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 20], "libcuda": 9, "cannot": [9, 20, 24, 25, 26], "share": [9, 20, 21], "object": [9, 20, 21, 31, 33, 34, 39, 47, 51, 52], "No": [9, 13, 20, 24, 25, 26, 47], "stub": 9, "list": [9, 15, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "16": [9, 15, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 44, 45, 47, 51, 52, 53], "second": [9, 14, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "2023": [9, 15, 21, 24, 25, 26, 41], "01": [9, 11, 15, 21, 24, 33, 34, 36, 37, 41], "02": [9, 11, 21, 22, 24, 25, 26, 33, 36, 39, 45, 51, 52], "06": [9, 15, 21, 22, 24, 34, 36, 40, 47], "info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "264": [9, 21, 26], "posixpath": [9, 15, 21, 24, 25, 26, 33, 36], "lang_dir": [9, 15, 21, 33, 36], "lang_phon": [9, 11, 15, 21, 34, 40, 44, 45, 47], "feature_dim": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 47], "search_beam": [9, 15, 21, 31, 36, 47], "20": [9, 14, 15, 21, 22, 24, 26, 31, 33, 34, 36, 39, 40, 44, 45, 47, 52], "output_beam": [9, 15, 21, 31, 36, 47], "min_active_st": [9, 15, 21, 31, 36, 47], "30": [9, 10, 15, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "max_active_st": [9, 15, 21, 31, 36, 47], "10000": [9, 15, 21, 31, 36, 47], "use_double_scor": [9, 15, 21, 31, 36, 47], "14": [9, 10, 15, 21, 22, 24, 25, 28, 31, 36, 39, 40, 41, 44, 51, 52, 53], "export": [9, 10, 11, 12, 13, 14, 16, 20, 21, 31, 33, 34, 36, 37, 40, 44, 45, 47], "feature_dir": [9, 15, 21, 36], "fbank": [9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "max_dur": [9, 15, 21, 36], "bucketing_sampl": [9, 15, 21, 36], "num_bucket": [9, 15, 21, 36], "concatenate_cut": [9, 15, 21, 36], "duration_factor": [9, 15, 21, 36], "gap": [9, 15, 21, 36], "on_the_fly_feat": [9, 15, 21, 36], "shuffl": [9, 15, 21, 36], "return_cut": [9, 15, 21, 36], "num_work": [9, 15, 21, 36], "env_info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "sha1": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 21], "date": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "tue": [9, 21, 24, 36], "jul": [9, 15, 21], "25": [9, 15, 21, 22, 24, 25, 31, 36, 39, 44, 45, 47, 52], "36": [9, 21, 24, 33, 36, 37], "dev": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "7640d663": 9, "branch": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 41], "375520d": 9, "fri": [9, 22], "28": [9, 21, 24, 25, 33, 36, 40], "07": [9, 21, 24, 25, 26, 31, 33, 34, 36], "hostnam": [9, 15, 21, 22, 24, 25, 26, 33], "ip": [9, 15, 21, 22, 24, 25, 26, 33], "172": 9, "17": [9, 21, 22, 24, 25, 26, 31, 36, 44, 45, 51], "401": 9, "lexicon": [9, 11, 15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "168": [9, 15, 21, 40], "compil": [9, 15, 21, 24, 25, 31, 33, 36], "linv": [9, 11, 15, 21, 33, 36, 47], "403": [9, 40], "273": [9, 15, 21, 22, 33], "devic": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 52, 53], "406": [9, 36], "291": [9, 21], "424": 9, "218": [9, 15, 21, 25], "about": [9, 11, 12, 14, 15, 16, 21, 24, 25, 26, 33, 37, 39, 42, 51, 52, 53], "cut": [9, 15, 21, 36], "425": [9, 25, 36], "252": [9, 21], "504": 9, "204": [9, 21, 26, 36], "batch": [9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "process": [9, 15, 21, 22, 24, 25, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "until": [9, 15, 21, 36, 41], "w": [9, 21, 36, 44, 45], "nnpack": 9, "cpp": [9, 24, 28], "53": [9, 15, 21, 26, 31, 39, 40, 45, 51, 52], "could": [9, 24, 25, 26, 31, 34], "reason": [9, 14, 22, 24, 25, 26, 52], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 21, 31], "transcript": [9, 15, 21, 31, 32, 33, 34, 36, 39, 40, 44, 45, 51, 52, 53], "store": [9, 11, 15, 21, 36], "recog": [9, 15, 21, 33, 36], "test_set": [9, 15, 21, 47], "688": 9, "564": [9, 15, 21], "240": [9, 15, 21, 31, 47], "ins": [9, 15, 21, 36, 47], "del": [9, 15, 21, 36, 47], "sub": [9, 15, 21, 36, 47], "690": 9, "249": [9, 21, 25], "wrote": [9, 15, 21, 36], "detail": [9, 11, 15, 21, 23, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "stat": [9, 15, 21, 36], "err": [9, 15, 21, 33, 36], "316": [9, 21, 36], "congratul": [9, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45, 47], "finish": [9, 14, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 52, 53], "successfulli": [9, 13, 21, 24, 25, 26], "collect": [10, 13, 21], "post": 10, "correspond": [10, 18, 19], "solut": 10, "One": 10, "torchaudio": [10, 13, 20, 49], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 21, 44, 45], "org": [10, 13, 21, 32, 33, 39, 51, 52, 53], "whl": [10, 13, 21], "torch_stabl": [10, 13, 21], "throw": [10, 24, 25, 26], "while": [10, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "That": [10, 11, 14, 15, 16, 24, 25, 37, 39, 51, 52, 53], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 52, 53], "recent": [10, 24, 25, 26], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 24, 25, 31, 36], "xxx": [10, 22, 24, 25, 26], "next": [10, 13, 14, 19, 21, 24, 25, 26, 36, 37, 39, 40, 41, 42, 51, 52, 53], "gen": [10, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "kaldi": [10, 11, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "34": [10, 24, 25], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 21, 24, 26, 33, 36, 40, 44], "tensorboard": [10, 16, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 21], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "py3": [10, 21], "linux": [10, 13, 14, 19, 21, 23, 24, 25, 26, 27], "x86_64": [10, 21, 24], "egg": 10, "handl": [10, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "except": [10, 22], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 41], "104": [10, 15, 21], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 19, 23, 24, 25, 26, 27], "probabl": [10, 33, 39, 41, 51, 52, 53], "variabl": [10, 12, 13, 16, 21, 24, 25, 26, 31, 34, 36, 37, 39, 41, 42, 51, 52, 53], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 24], "libpython": 10, "abl": 10, "insid": [10, 29], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 21, 24, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 52, 53], "everyth": [11, 23], "tmp": [11, 12, 13, 15, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "pythonpath": [11, 12, 13, 15, 16, 21, 24, 25, 26], "each": [11, 15, 22, 24, 25, 27, 31, 33, 34, 36, 39, 41, 42, 49, 51, 52, 53], "exist": 11, "anyth": [11, 17, 19], "els": 11, "wonder": [11, 15], "url": [11, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "varieti": 11, "folder": [11, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "wav": [11, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 44, 45, 47, 51, 52, 53], "scp": 11, "feat": 11, "put": [11, 13, 21, 24, 25, 41, 52], "l": [11, 21, 24, 25, 26, 33, 44, 45, 47], "waves_yesno": [11, 15, 21], "tar": [11, 21], "gz": [11, 21], "l41": 11, "extract": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "yesno_cuts_test": 11, "jsonl": [11, 22], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 21, 40, 44, 45, 47], "l_disambig": [11, 47], "lexicon_disambig": [11, 47], "manifest": [11, 21, 37], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 51, 52, 53], "thei": [11, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "idea": [11, 15, 49], "examin": 11, "relat": [11, 22, 31, 33, 36, 40, 44, 45, 47], "gunzip": 11, "c": [11, 21, 33, 34, 39, 41, 42, 47, 51, 52, 53], "head": [11, 21, 33, 49], "output": [11, 12, 13, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "id": [11, 31, 34, 36, 40, 44, 45], "0_0_0_0_1_1_1_1": 11, "channel": [11, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 21, 22, 24, 25, 26, 33, 36, 51], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 21, 44, 45], "l300": 11, "mean": [11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "field": [11, 32], "per": [11, 33, 39, 52, 53], "recording_id": 11, "NO": [11, 15, 47], "ye": [11, 15, 47], "hebrew": [11, 47], "supervis": 11, "l510": 11, "furthermor": [11, 33], "featur": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "compress": [11, 21], "lilcom": [11, 21], "cutset": 11, "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": 11, "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 19, 25, 26, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "separ": [11, 27], "lang": [11, 21, 22, 33, 36, 42], "quit": [12, 14, 16, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "cuda_visible_devic": [12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "usag": [12, 15, 16, 22, 24, 25, 26, 28, 29, 40, 44, 45, 47], "one": [12, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "tini": [13, 14], "well": [13, 22, 47, 54], "hundr": 13, "thousand": 13, "virtualenv": [13, 21], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 22], "pkg_resourc": 13, "wheel": [13, 21, 24], "remeb": 13, "continu": [13, 15, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "caution": [13, 31, 36], "matter": [13, 21, 24], "torchaduio": 13, "window": [13, 14, 19, 23, 24, 25, 26, 27], "from_wheel": [13, 15, 21], "dev20230726": [13, 15], "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 21, 24, 25, 26, 28, 31, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "walk": 14, "recognit": [14, 19, 20, 23, 24, 25, 32, 33, 47, 54], "system": 14, "out": [14, 37], "minut": 14, "sequenti": 14, "part": [14, 15, 19, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 20, 30, 31, 33, 34, 36, 40, 44, 45, 47], "jit": [14, 20, 23, 30, 36], "onnx": [14, 20, 22, 30], "torchscript": [15, 23, 28, 29, 30], "trace": [15, 20, 23, 28, 30], "explain": 15, "kind": [15, 36, 39, 41, 42, 51, 52, 53], "produc": [15, 23, 39, 41, 42, 51, 52, 53], "03": [15, 21, 22, 25, 33, 36, 44, 45, 51], "912": [15, 22], "76": [15, 21, 47], "lr": [15, 21, 33, 51], "weight_decai": [15, 21], "1e": [15, 21], "start_epoch": [15, 21], "best_train_loss": [15, 21, 22, 24, 25, 26], "inf": [15, 21, 22, 24, 25, 26], "best_valid_loss": [15, 21, 22, 24, 25, 26], "best_train_epoch": [15, 21, 22, 24, 25, 26], "best_valid_epoch": [15, 21, 22, 25, 26], "batch_idx_train": [15, 21, 22, 24, 25, 26], "log_interv": [15, 21, 22, 24, 25, 26], "reset_interv": [15, 21, 22, 24, 25, 26], "valid_interv": [15, 21, 22, 24, 25, 26], "beam_siz": [15, 21, 22, 33], "sum": [15, 21], "913": 15, "950": 15, "971": [15, 45], "106": [15, 21, 25, 36], "Not": 15, "974": 15, "111": [15, 21, 36], "kei": [15, 24, 25, 26, 36], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 21, 24, 25, 31, 33], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 21, 24, 31, 33, 36], "26": [15, 21, 24, 25, 26, 33, 36, 45], "09": [15, 22, 25, 31, 33, 34, 36, 51], "aa073f6": 15, "none": [15, 21, 31, 36], "9a47c08": 15, "mon": [15, 25, 26], "aug": [15, 37], "50": [15, 21, 22, 24, 25, 26, 36, 39, 44, 51, 52, 53], "privat": 15, "fangjun": [15, 21, 22, 24, 25, 26, 33, 36], "macbook": 15, "pro": [15, 31, 36], "127": [15, 21, 24, 25, 47], "092": 15, "103": 15, "272": 15, "109": [15, 21, 31, 36], "112": [15, 24, 25, 26], "115": [15, 24, 25, 31, 36], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 33], "559": 15, "315": [15, 24, 31, 33, 34, 36, 40], "ident": 15, "kaldifeat": 15, "csukuangfj": [15, 21, 22, 24, 25, 27, 31, 33, 34, 36, 40, 44, 45, 47, 51], "0_0_0_1_0_0_0_1": [15, 47], "0_0_1_0_0_0_1_0": [15, 47], "19": [15, 22, 24, 25, 26, 31, 36, 40, 44, 45], "208": [15, 36], "136": [15, 36], "num_class": [15, 31, 36, 47], "sample_r": [15, 22, 31, 33, 36, 47], "words_fil": [15, 31, 36, 47], "sound_fil": [15, 22, 31, 33, 36, 47], "142": [15, 24, 31, 34, 36], "144": [15, 36], "212": 15, "213": [15, 47], "construct": [15, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "170": [15, 40], "sound": [15, 22, 24, 25, 26, 29, 30, 31, 33, 34, 36, 40, 44, 45, 47], "224": 15, "176": [15, 24, 33, 36], "304": [15, 25], "214": [15, 33, 36], "47": [15, 21, 24, 25, 26, 31, 36], "44": [15, 21, 24, 25, 36, 44, 45], "666": 15, "667": 15, "670": 15, "677": [15, 24], "100": [15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "843": 15, "cpu_jit": [15, 28, 31, 36, 39, 41, 42, 52, 53], "confus": [15, 28], "move": [15, 28, 39, 41, 42, 52, 53], "map_loc": 15, "resid": 15, "default": [15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "jit_pretrain": [15, 29, 41, 42, 51], "nn": [15, 33, 39, 41, 42, 51, 52, 53], "56": [15, 21, 24, 25, 36, 44], "00": [15, 21, 24, 31, 33, 34, 36, 40, 44, 45, 47], "603": 15, "121": [15, 40], "nn_model": [15, 31, 36], "129": [15, 34], "640": [15, 21, 26], "134": [15, 31], "641": 15, "138": [15, 31, 33], "148": 15, "642": 15, "154": [15, 34], "727": 15, "190": [15, 40], "192": [15, 26, 36], "export_onnx": 15, "onnxruntim": [15, 27], "888": [15, 31], "83": [15, 36, 40], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 21, 22, 24, 31, 33, 36, 44, 45], "047": [15, 33], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 20, 36, 49, 52, 54], "vocab_s": [15, 22, 24, 25, 26, 33], "049": 15, "140": [15, 21, 34], "int8": [15, 23, 30], "quantiz": [15, 23, 30, 37], "075": 15, "onnx_quant": 15, "538": [15, 36], "tensor": [15, 21, 25, 26, 31, 33, 34, 36, 39, 47, 51, 52], "transpose_1_output_0": 15, "081": 15, "151": [15, 24], "float32": [15, 24, 25, 26], "onnx_pretrain": [15, 27], "260": [15, 26, 36], "166": 15, "171": [15, 21, 34, 36, 44, 45], "173": 15, "267": [15, 25, 33, 44, 45], "270": 15, "180": [15, 25, 31, 36], "279": [15, 36], "196": 15, "318": [15, 24, 25], "232": 15, "234": [15, 36], "deploi": [15, 27, 31, 36], "sherpa": [15, 19, 23, 28, 29, 30, 51], "framework": [15, 19, 39, 52], "_": [15, 37], "ncnn": [15, 20, 30], "icefall_export_to_ncnn": 15, "youtub": [17, 20, 36, 37, 39, 40, 41, 42, 51, 52, 53], "video": [17, 20, 36, 37, 39, 40, 41, 42, 51, 52, 53], "upload": [18, 19, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "visit": [18, 19, 39, 41, 42, 51, 52, 53], "specif": [18, 27, 33], "aishel": [18, 20, 31, 33, 34, 35, 54], "wenetspeech": [18, 28], "ipad": 19, "phone": 19, "screenshot": [19, 31, 33, 34, 36, 37, 39, 47, 51, 52], "chines": [19, 32, 33], "english": [19, 47, 51], "greedi": 19, "click": [19, 21, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "button": 19, "submit": 19, "wait": 19, "moment": 19, "bottom": [19, 39, 41, 42, 51, 52, 53], "subscrib": [19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "nadira": [19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "povei": [19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "www": [19, 21, 32, 36, 37, 39, 40, 41, 42, 51, 52, 53], "uc_vaumpkminz1pnkfxan9mw": [19, 21, 36, 37, 39, 40, 41, 42, 51, 52, 53], "dummi": [20, 36], "toolkit": 20, "cudnn": 20, "docker": [20, 21], "frequent": 20, "ask": 20, "question": 20, "faq": 20, "oserror": 20, "libtorch_hip": 20, "attributeerror": 20, "distutil": 20, "attribut": [20, 26, 36], "libpython3": 20, "timit": [20, 35, 44, 45, 54], "contribut": 20, "support": [21, 23, 24, 25, 26, 31, 33, 36, 39, 41, 42, 49, 51, 52, 53], "guid": 21, "suggest": [21, 39, 41, 42, 51, 52, 53], "alwai": [21, 22], "strongli": 21, "point": [21, 22, 31, 34, 36, 37, 39, 41, 42, 51, 52, 53], "sever": [21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 52, 53], "just": [21, 24, 25, 26, 49], "kuangfangjun": [21, 24, 25, 26], "cpython3": 21, "final": [21, 22, 24, 25, 36, 40], "64": [21, 22, 24, 33, 52], "9422m": 21, "creator": 21, "cpython3posix": 21, "dest": 21, "star": [21, 24, 25, 26], "fj": [21, 22, 24, 25, 26, 33, 36], "clear": 21, "no_vcs_ignor": 21, "global": 21, "seeder": 21, "fromappdata": 21, "bundl": 21, "app_data_dir": 21, "ad": [21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 49, 51, 52, 53], "seed": 21, "bashactiv": 21, "cshellactiv": 21, "fishactiv": 21, "nushellactiv": 21, "powershellactiv": 21, "pythonactiv": 21, "determin": 21, "nvidia": [21, 31, 33, 34, 36], "smi": 21, "49": [21, 24, 25, 36, 45, 47], "510": 21, "driver": 21, "greater": 21, "our": [21, 24, 25, 26, 28, 29, 36, 37, 39, 49, 52, 53], "case": [21, 22, 24, 25, 26, 39, 41, 42, 51, 52, 53], "verifi": 21, "nvcc": 21, "copyright": 21, "2005": 21, "2019": 21, "corpor": 21, "wed_oct_23_19": 21, "38_pdt_2019": 21, "v10": 21, "89": [21, 31], "cu116": 21, "compat": 21, "stabl": 21, "matrix": 21, "2bcu116": 21, "cp38": 21, "linux_x86_64": 21, "1983": 21, "mb": [21, 24, 25, 26], "________________________________________": 21, "gb": [21, 33], "764": 21, "kb": [21, 24, 25, 26, 44, 45], "eta": 21, "satisfi": 21, "extens": 21, "__version__": 21, "dev20230725": 21, "pypi": 21, "tuna": 21, "tsinghua": 21, "edu": 21, "cn": 21, "resolv": 21, "main": [21, 31, 36, 49], "ubuntu": [21, 24, 25, 26], "2bcuda11": 21, "manylinux_2_17_x86_64": 21, "manylinux2014_x86_64": 21, "graphviz": 21, "cach": [21, 26], "de": [21, 22, 24, 25, 26, 33], "5e": 21, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 21, "o": 21, "cento": 21, "2009": 21, "core": 21, "cmake": [21, 24, 25, 31, 36], "27": [21, 24, 25, 26, 31, 33, 40, 45], "gcc": 21, "cmake_cuda_flag": 21, "wno": 21, "deprec": [21, 33], "lineinfo": 21, "expt": 21, "extend": 21, "lambda": 21, "use_fast_math": 21, "xptxa": 21, "gencod": 21, "arch": 21, "compute_35": 21, "sm_35": 21, "compute_50": 21, "sm_50": 21, "compute_60": 21, "sm_60": 21, "compute_61": 21, "sm_61": 21, "compute_70": 21, "sm_70": 21, "compute_75": 21, "sm_75": 21, "compute_80": 21, "sm_80": 21, "compute_86": 21, "sm_86": 21, "donnx_namespac": 21, "onnx_c2": 21, "compute_52": 21, "sm_52": 21, "xcudaf": 21, "diag_suppress": 21, "cc_clobber_ignor": 21, "integer_sign_chang": 21, "useless_using_declar": 21, "set_but_not_us": 21, "field_without_dll_interfac": 21, "base_class_has_different_dll_interfac": 21, "dll_interface_conflict_none_assum": 21, "dll_interface_conflict_dllexport_assum": 21, "implicit_return_from_non_void_funct": 21, "unsigned_compare_with_zero": 21, "declared_but_not_referenc": 21, "bad_friend_decl": 21, "relax": 21, "constexpr": 21, "d_glibcxx_use_cxx11_abi": 21, "option": [21, 23, 27, 30, 33, 37, 40, 44, 45, 47], "wall": 21, "strict": [21, 26, 32], "overflow": 21, "unknown": 21, "pragma": 21, "cmake_cxx_flag": 21, "unus": 21, "nvtx": 21, "enabl": [21, 37], "disabl": [21, 22, 24, 25], "debug": 21, "sync": 21, "kernel": [21, 24, 26, 33], "memori": [21, 24, 31, 33, 36, 49], "alloc": 21, "214748364800": 21, "byte": [21, 24, 25, 26], "200": [21, 22, 24, 25, 26, 31, 36, 37, 44, 45, 47], "abort": 21, "__file__": 21, "cpython": [21, 24], "gnu": [21, 24], "req": 21, "vq12fd5i": 21, "filter": 21, "quiet": [21, 32], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 21, "metadata": [21, 44, 45], "pyproject": 21, "toml": 21, "cytoolz": 21, "3b": 21, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 21, "33": [21, 24, 25, 31, 32, 33, 36, 44], "pyyaml": 21, "c8": 21, "6b": 21, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 21, "ma": 21, "nylinux_2_17_x86_64": 21, "736": 21, "dataclass": 21, "2f": 21, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 21, "dev0": 21, "7640d66": 21, "a8": 21, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 21, "linux_2_17_x86_64": 21, "87": [21, 24], "tqdm": 21, "e6": 21, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 21, "numpi": 21, "audioread": 21, "5d": 21, "cb": 21, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 21, "377": 21, "tabul": 21, "40": [21, 24, 25, 26, 34, 36, 40, 44, 45], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 21, "1a": 21, "70": 21, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 21, "97": [21, 24, 31], "ab": [21, 39, 51, 52, 53], "c3": 21, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 21, "intervaltre": 21, "fb": 21, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 21, "soundfil": 21, "bd": 21, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 21, "py2": 21, "46": [21, 25, 31, 36], "toolz": 21, "7f": 21, "5c": 21, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 21, "55": [21, 24, 34, 36, 44], "sortedcontain": 21, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 21, "cffi": 21, "b7": 21, "8b": 21, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 21, "15": [21, 22, 24, 25, 26, 33, 34, 36, 44, 47], "442": 21, "pycpars": 21, "d5": 21, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 21, "118": [21, 36], "filenam": [21, 24, 25, 26, 27, 28, 29, 41, 42, 51, 53], "size": [21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "687627": 21, "sha256": 21, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 21, "ephem": 21, "wwtk90_m": 21, "7a": 21, "8e": 21, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 21, "23704": 21, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 21, "9c": 21, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 21, "26098": 21, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 21, "f3": 21, "ed": 21, "2b": 21, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 21, "remot": 21, "enumer": 21, "12942": 21, "count": 21, "total": [21, 25, 26, 31, 33, 34, 36, 37, 39, 40, 47, 51, 52], "delta": 21, "reus": 21, "pack": [21, 52, 53], "12875": 21, "receiv": 21, "mib": 21, "8835": 21, "41": [21, 24, 26, 31, 33, 44, 47], "dl_dir": [21, 31, 34, 36, 37, 39, 41, 42, 51, 52, 53], "___________________________________________________": 21, "70m": 21, "1mb": 21, "718": 21, "compute_fbank_yesno": 21, "_______________________________________________________________________________": 21, "90": [21, 24], "82it": 21, "778": 21, "______________________________________________________________________________": 21, "256": [21, 26, 44, 45], "92it": 21, "51": [21, 24, 31, 36, 47], "66": [21, 25], "project": 21, "kaldilm": 21, "csrc": [21, 36], "arpa_file_pars": 21, "cc": 21, "void": 21, "arpafilepars": 21, "std": 21, "istream": 21, "79": 21, "92": [21, 36], "275": [21, 31], "compile_hlg": 21, "124": [21, 31, 36], "276": 21, "convert": [21, 24, 25, 26, 36], "309": 21, "ctc_topo": 21, "max_token_id": 21, "310": 21, "314": 21, "intersect": [21, 39, 52, 53], "323": 21, "lg": [21, 39, 42, 52, 53], "shape": [21, 26], "connect": [21, 22, 36, 39, 40, 51, 52, 53], "68": [21, 36], "class": [21, 36], "71": [21, 36, 40], "341": 21, "rag": 21, "raggedtensor": 21, "remov": [21, 31, 33, 34, 36, 40, 44, 45], "disambigu": 21, "354": 21, "91": 21, "remove_epsilon": 21, "445": 21, "arc": 21, "compos": 21, "h": 21, "446": 21, "447": 21, "segment": 21, "fault": 21, "dump": 21, "protocol_buffers_python_implement": 21, "674": 21, "interest": [21, 37, 39, 41, 42, 51, 52, 53], "936": 21, "481": 21, "482": 21, "world_siz": [21, 37], "master_port": 21, "12354": 21, "num_epoch": 21, "3fb0a43": 21, "thu": [21, 22, 24, 25, 26, 33, 36, 40], "05": [21, 22, 24, 25, 31, 33, 34, 36, 45], "74279": [21, 22, 24, 25, 26, 33], "1220091118": 21, "57c4d55446": 21, "sph26": 21, "941": 21, "949": 21, "495": 21, "965": [21, 31], "146": 21, "244": 21, "967": 21, "149": [21, 24, 36], "199": [21, 36, 40], "singlecutsampl": 21, "205": [21, 36], "968": 21, "565": [21, 36], "422": 21, "loss": [21, 24, 25, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "065": 21, "over": [21, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "2436": 21, "frame": [21, 33, 39, 41, 52, 53], "tot_loss": 21, "681": [21, 24], "4561": 21, "2828": 21, "7076": 21, "22192": 21, "54": [21, 25, 26, 36, 40, 44, 45], "167": 21, "444": 21, "9002": 21, "18067": 21, "011": 21, "2555": 21, "2695": 21, "484": 21, "34971": 21, "331": [21, 24, 25, 36, 40], "4688": 21, "368": 21, "75": [21, 24], "633": 21, "2532": 21, "242": [21, 31, 36], "1139": 21, "1592": 21, "522": [21, 36], "1627": 21, "209": [21, 40], "07055": 21, "1175": 21, "07091": 21, "847": 21, "07731": 21, "427": [21, 25, 36], "04391": 21, "05341": 21, "884": 21, "04384": 21, "387": [21, 45], "03458": 21, "04616": 21, "707": [21, 31, 36], "03379": 21, "758": [21, 36], "433": [21, 36], "01054": 21, "980": [21, 36], "009014": 21, "009974": 21, "489": [21, 31], "01085": 21, "258": [21, 44, 45], "01172": 21, "01055": 21, "621": [21, 47], "01074": 21, "699": 21, "866": 21, "01044": 21, "844": 21, "008942": 21, "221": [21, 36], "01082": 21, "970": [21, 36], "01169": 21, "247": 21, "01073": 21, "326": [21, 25], "555": 21, "840": 21, "841": 21, "855": 21, "868": 21, "882": 21, "883": 21, "157": 21, "701": 21, "702": [21, 36], "704": [21, 31, 44], "fun": [21, 24, 25], "variou": [21, 27, 30, 54], "period": [22, 24], "disk": 22, "optim": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "resum": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "strip": 22, "reduc": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "pruned_transducer_stateless3": [22, 28, 49], "almost": [22, 39, 49, 52, 53], "dict": [22, 26], "stateless3": [22, 24], "repo": [22, 27], "those": 22, "wave": [22, 24, 25, 26, 31, 36], "iter": [22, 24, 25, 26, 29, 39, 41, 42, 51, 52, 53], "1224000": 22, "greedy_search": [22, 33, 39, 41, 51, 52, 53], "test_wav": [22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47], "1089": [22, 24, 25, 26, 27, 36, 40], "134686": [22, 24, 25, 26, 27, 36, 40], "0001": [22, 24, 25, 26, 27, 36, 40], "1221": [22, 24, 25, 36, 40], "135766": [22, 24, 25, 36, 40], "0002": [22, 24, 25, 36, 40], "multipl": [22, 31, 33, 34, 36, 40, 44, 45, 47], "Its": [22, 24, 25, 26, 36], "233": [22, 24, 25], "265": 22, "3000": [22, 24, 25, 26], "80": [22, 24, 25, 26, 31, 33, 36], "subsampling_factor": [22, 25, 26, 31, 33, 36], "encoder_dim": [22, 24, 25, 26], "512": [22, 24, 25, 26, 31, 33, 36], "nhead": [22, 24, 26, 31, 33, 36, 39, 52], "dim_feedforward": [22, 24, 25, 33], "num_encoder_lay": [22, 24, 25, 26, 33], "decoder_dim": [22, 24, 25, 26], "joiner_dim": [22, 24, 25, 26], "model_warm_step": [22, 24, 25], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 22, "oct": [22, 36], "miss": [22, 24, 25, 26, 33, 36], "cu102": [22, 24, 25, 26], "1013": 22, "c39cba5": 22, "dirti": [22, 24, 25, 31, 36], "ceph": [22, 31, 33, 36], "0324160024": 22, "65bfd8b584": 22, "jjlbn": 22, "bpe_model": [22, 24, 25, 26, 36], "16000": [22, 31, 33, 34, 36, 40, 41, 44, 45], "max_context": 22, "max_stat": 22, "context_s": [22, 24, 25, 26, 33], "max_sym_per_fram": [22, 33], "simulate_stream": 22, "decode_chunk_s": 22, "left_context": 22, "dynamic_chunk_train": 22, "causal_convolut": 22, "short_chunk_s": [22, 26, 52, 53], "num_left_chunk": [22, 26], "blank_id": [22, 24, 25, 26, 33], "unk_id": 22, "271": [22, 25], "612": 22, "458": 22, "giga": [22, 25, 51], "623": 22, "277": 22, "78648040": 22, "951": [22, 36], "285": [22, 33, 36], "952": 22, "295": [22, 31, 33, 34, 36], "957": 22, "301": [22, 36], "700": 22, "329": [22, 25, 36], "388": 22, "earli": [22, 24, 25, 26, 36, 40], "nightfal": [22, 24, 25, 26, 36, 40], "THE": [22, 24, 25, 26, 36, 40], "yellow": [22, 24, 25, 26, 36, 40], "lamp": [22, 24, 25, 26, 36, 40], "light": [22, 24, 25, 26, 36, 40], "AND": [22, 24, 25, 26, 36, 40], "THERE": [22, 24, 25, 26, 36, 40], "squalid": [22, 24, 25, 26, 36, 40], "quarter": [22, 24, 25, 26, 36, 40], "OF": [22, 24, 25, 26, 36, 40], "brothel": [22, 24, 25, 26, 36, 40], "god": [22, 36, 40], "AS": [22, 36, 40], "direct": [22, 36, 40], "consequ": [22, 36, 40], "sin": [22, 36, 40], "man": [22, 36, 40], "punish": [22, 36, 40], "had": [22, 36, 40], "her": [22, 36, 40], "love": [22, 36, 40], "child": [22, 36, 40], "whose": [22, 33, 36, 40], "ON": [22, 24, 36, 40], "THAT": [22, 36, 40], "dishonor": [22, 36, 40], "bosom": [22, 36, 40], "TO": [22, 36, 40], "parent": [22, 36, 40], "forev": [22, 36, 40], "WITH": [22, 36, 40], "race": [22, 36, 40], "descent": [22, 36, 40], "mortal": [22, 36, 40], "BE": [22, 36, 40], "bless": [22, 36, 40], "soul": [22, 36, 40], "IN": [22, 36, 40], "heaven": [22, 36, 40], "yet": [22, 24, 25, 36, 40], "THESE": [22, 36, 40], "thought": [22, 36, 40], "affect": [22, 36, 40], "hester": [22, 36, 40], "prynn": [22, 36, 40], "hope": [22, 32, 36, 40], "apprehens": [22, 36, 40], "390": 22, "down": [22, 31, 36, 39, 41, 42, 51, 52, 53], "reproduc": [22, 36], "9999": [22, 41, 42, 51], "symlink": 22, "pass": [22, 26, 31, 33, 34, 36, 39, 41, 42, 49, 51, 52, 53], "zipform": [23, 27, 30, 35, 38, 48, 50], "convemform": [23, 30, 49], "platform": [23, 27], "android": [23, 24, 25, 26, 27], "raspberri": [23, 27], "pi": [23, 27], "\u7231\u82af\u6d3e": 23, "maix": 23, "iii": 23, "axera": 23, "rv1126": 23, "static": 23, "binari": [23, 24, 25, 26, 31, 33, 34, 36, 39, 47, 51, 52], "pnnx": [23, 30], "encod": [23, 27, 29, 30, 31, 33, 34, 36, 39, 40, 41, 47, 49, 51, 52, 53], "conv": [24, 25], "emform": [24, 25, 28], "stateless2": [24, 25, 51], "pretrained_model": [24, 25, 26], "online_transduc": 24, "jit_xxx": [24, 25, 26], "anywher": [24, 25], "submodul": 24, "recurs": 24, "init": 24, "dcmake_build_typ": [24, 31, 36], "dncnn_python": 24, "dncnn_build_benchmark": 24, "dncnn_build_exampl": 24, "dncnn_build_tool": 24, "j4": 24, "pwd": 24, "src": [24, 26], "compon": [24, 49], "ncnn2int8": [24, 25], "am": 24, "sai": [24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "later": [24, 25, 26, 31, 34, 36, 39, 40, 41, 42, 44, 45, 51, 52, 53], "termin": 24, "tencent": [24, 25], "modif": [24, 33], "offic": 24, "synchron": 24, "offici": 24, "renam": [24, 25, 26], "conv_emformer_transducer_stateless2": [24, 49], "length": [24, 26, 33, 52, 53], "cnn": [24, 26], "31": [24, 25, 26, 36], "context": [24, 33, 39, 49, 51, 52, 53], "configur": [24, 26, 33, 37, 40, 44, 45, 47], "accordingli": [24, 25, 26], "yourself": [24, 25, 26, 37, 52, 53], "220": [24, 33, 34, 36], "229": [24, 31], "best_v": 24, "alid_epoch": 24, "subsampl": [24, 52, 53], "ing_factor": 24, "a34171ed85605b0926eebbd0463d059431f4f74a": 24, "dec": 24, "ver": 24, "ion": 24, "530e8a1": 24, "op": 24, "1220120619": [24, 25, 26], "7695ff496b": [24, 25, 26], "s9n4w": [24, 25, 26], "icefa": 24, "ll": 24, "transdu": 24, "cer": 24, "use_averaged_model": [24, 25, 26], "cnn_module_kernel": [24, 26], "left_context_length": 24, "chunk_length": 24, "right_context_length": 24, "memory_s": 24, "231": [24, 25, 26], "053": 24, "022": 24, "708": [24, 31, 33, 36, 47], "75490012": 24, "320": [24, 33], "682": 24, "lh": [24, 25, 26], "rw": [24, 25, 26], "289m": 24, "jan": [24, 25, 26], "289": 24, "roughli": [24, 25, 26], "equal": [24, 25, 26, 52, 53], "1024": [24, 25, 26, 51], "287": [24, 47], "1010k": [24, 25], "decoder_jit_trac": [24, 25, 26, 29, 51, 53], "283m": 24, "encoder_jit_trac": [24, 25, 26, 29, 51, 53], "0m": [24, 25], "joiner_jit_trac": [24, 25, 26, 29, 51, 53], "sure": [24, 25, 26], "found": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "param": [24, 25, 26], "503k": [24, 25], "437": [24, 25, 26], "142m": 24, "79k": 24, "5m": [24, 25], "architectur": [24, 25, 26, 51], "editor": [24, 25, 26], "content": [24, 25, 26], "283": [24, 26], "1010": [24, 25], "503": [24, 25], "convers": [24, 25, 26], "half": [24, 25, 26, 39, 52, 53], "v": [24, 25, 26, 36, 44, 45], "float16": [24, 25, 26], "occupi": [24, 25, 26], "twice": [24, 25, 26], "smaller": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "fp16": [24, 25, 26, 39, 41, 42, 51, 52, 53], "won": [24, 25, 26, 27, 31, 34, 36, 37, 39, 41, 42, 51, 52, 53], "accept": [24, 25, 26], "216": [24, 31, 36, 44, 45], "encoder_param_filenam": [24, 25, 26], "encoder_bin_filenam": [24, 25, 26], "decoder_param_filenam": [24, 25, 26], "decoder_bin_filenam": [24, 25, 26], "joiner_param_filenam": [24, 25, 26], "joiner_bin_filenam": [24, 25, 26], "sound_filenam": [24, 25, 26], "141": 24, "328": 24, "336": 24, "106000": [24, 25, 26, 36, 40], "581": [24, 40], "381": 24, "7767517": [24, 25, 26], "1060": 24, "1342": 24, "in0": [24, 25, 26], "explan": [24, 25, 26], "magic": [24, 25, 26], "intermedi": [24, 25, 26], "increment": [24, 25, 26], "1061": 24, "sherpametadata": [24, 25, 26], "sherpa_meta_data1": [24, 25, 26], "newli": [24, 25, 26], "must": [24, 25, 26, 52], "eas": [24, 25, 26], "pair": [24, 25, 26], "sad": [24, 25, 26], "rememb": [24, 25, 26], "anymor": [24, 25, 26], "flexibl": [24, 25, 26], "edit": [24, 25, 26], "arm": [24, 25, 26], "aarch64": [24, 25, 26], "onc": [24, 25], "mayb": [24, 25], "year": [24, 25], "_jit_trac": [24, 25], "fp32": [24, 25], "doubl": [24, 25], "j": [24, 25, 31, 36], "py38": [24, 25, 26], "arg": [24, 25], "wave_filenam": [24, 25], "16k": [24, 25], "hz": [24, 25, 44, 45], "mono": [24, 25], "calibr": [24, 25], "cat": [24, 25], "eof": [24, 25], "calcul": [24, 25, 41, 52, 53], "has_gpu": [24, 25], "config": [24, 25], "use_vulkan_comput": [24, 25], "88": [24, 33], "conv_87": 24, "942385": [24, 25], "threshold": [24, 25, 41], "938493": 24, "968131": 24, "conv_88": 24, "442448": 24, "549335": 24, "167552": 24, "conv_89": 24, "228289": 24, "001738": 24, "871552": 24, "linear_90": 24, "976146": 24, "101789": 24, "267128": 24, "linear_91": 24, "962030": 24, "162033": 24, "602713": 24, "linear_92": 24, "323041": 24, "853959": 24, "953129": 24, "linear_94": 24, "905416": 24, "648006": 24, "323545": 24, "linear_93": 24, "474093": 24, "200188": 24, "linear_95": 24, "888012": 24, "403563": 24, "483986": 24, "linear_96": 24, "856741": 24, "398679": 24, "524273": 24, "linear_97": 24, "635942": 24, "613655": 24, "590950": 24, "linear_98": 24, "460340": 24, "670146": 24, "398010": 24, "linear_99": 24, "532276": 24, "585537": 24, "119396": 24, "linear_101": 24, "585871": 24, "719224": 24, "205809": 24, "linear_100": 24, "751382": 24, "081648": 24, "linear_102": 24, "593344": 24, "450581": 24, "551147": 24, "linear_103": 24, "592681": 24, "705824": 24, "257959": 24, "linear_104": 24, "752957": 24, "980955": 24, "110489": 24, "linear_105": 24, "696240": 24, "877193": 24, "608953": 24, "linear_106": 24, "059659": 24, "643138": 24, "048950": 24, "linear_108": 24, "975461": 24, "589567": 24, "671457": 24, "linear_107": 24, "190381": 24, "515701": 24, "linear_109": 24, "710759": 24, "305635": 24, "082436": 24, "linear_110": 24, "531228": 24, "731162": 24, "159557": 24, "linear_111": 24, "528083": 24, "259322": 24, "211544": 24, "linear_112": 24, "148807": 24, "500842": 24, "087374": 24, "linear_113": 24, "592566": 24, "948851": 24, "166611": 24, "linear_115": 24, "437109": 24, "608947": 24, "642395": 24, "linear_114": 24, "193942": 24, "503904": 24, "linear_116": 24, "966980": 24, "200896": 24, "676392": 24, "linear_117": 24, "451303": 24, "061664": 24, "951344": 24, "linear_118": 24, "077262": 24, "965800": 24, "023804": 24, "linear_119": 24, "671615": 24, "847613": 24, "198460": 24, "linear_120": 24, "625638": 24, "131427": 24, "556595": 24, "linear_122": 24, "274080": 24, "888716": 24, "978189": 24, "linear_121": 24, "420480": 24, "429659": 24, "linear_123": 24, "826197": 24, "599617": 24, "281532": 24, "linear_124": 24, "396383": 24, "325849": 24, "335875": 24, "linear_125": 24, "337198": 24, "941410": 24, "221970": 24, "linear_126": 24, "699965": 24, "842878": 24, "224073": 24, "linear_127": 24, "775370": 24, "884215": 24, "696438": 24, "linear_129": 24, "872276": 24, "837319": 24, "254213": 24, "linear_128": 24, "180057": 24, "687883": 24, "linear_130": 24, "150427": 24, "454298": 24, "765789": 24, "linear_131": 24, "112692": 24, "924847": 24, "025545": 24, "linear_132": 24, "852893": 24, "116593": 24, "749626": 24, "linear_133": 24, "517084": 24, "024665": 24, "275314": 24, "linear_134": 24, "683807": 24, "878618": 24, "743618": 24, "linear_136": 24, "421055": 24, "322729": 24, "086264": 24, "linear_135": 24, "309880": 24, "917679": 24, "linear_137": 24, "827781": 24, "744595": 24, "915554": 24, "linear_138": 24, "422395": 24, "742882": 24, "402161": 24, "linear_139": 24, "527538": 24, "866123": 24, "849449": 24, "linear_140": 24, "128619": 24, "657793": 24, "266134": 24, "linear_141": 24, "839593": 24, "845993": 24, "021378": 24, "linear_143": 24, "442304": 24, "099039": 24, "889746": 24, "linear_142": 24, "325038": 24, "849592": 24, "linear_144": 24, "929444": 24, "618206": 24, "605080": 24, "linear_145": 24, "382126": 24, "321095": 24, "625010": 24, "linear_146": 24, "894987": 24, "867645": 24, "836517": 24, "linear_147": 24, "915313": 24, "906028": 24, "886522": 24, "linear_148": 24, "614287": 24, "908151": 24, "496181": 24, "linear_150": 24, "724932": 24, "485588": 24, "312899": 24, "linear_149": 24, "161146": 24, "606939": 24, "linear_151": 24, "164453": 24, "847355": 24, "719223": 24, "linear_152": 24, "086471": 24, "984121": 24, "222834": 24, "linear_153": 24, "099524": 24, "991601": 24, "816805": 24, "linear_154": 24, "054585": 24, "489706": 24, "286930": 24, "linear_155": 24, "389185": 24, "100321": 24, "963501": 24, "linear_157": 24, "982999": 24, "154796": 24, "637253": 24, "linear_156": 24, "537706": 24, "875190": 24, "linear_158": 24, "420287": 24, "502287": 24, "531588": 24, "linear_159": 24, "014746": 24, "423280": 24, "477261": 24, "linear_160": 24, "633553": 24, "715335": 24, "220921": 24, "linear_161": 24, "371849": 24, "117830": 24, "815203": 24, "linear_162": 24, "492933": 24, "126283": 24, "623318": 24, "linear_164": 24, "697504": 24, "825712": 24, "317358": 24, "linear_163": 24, "078367": 24, "008038": 24, "linear_165": 24, "023975": 24, "836278": 24, "577358": 24, "linear_166": 24, "860619": 24, "259792": 24, "493614": 24, "linear_167": 24, "380934": 24, "496160": 24, "107042": 24, "linear_168": 24, "691216": 24, "733317": 24, "831076": 24, "linear_169": 24, "723948": 24, "952728": 24, "129707": 24, "linear_171": 24, "034811": 24, "366547": 24, "665123": 24, "linear_170": 24, "356277": 24, "710501": 24, "linear_172": 24, "556884": 24, "729481": 24, "166058": 24, "linear_173": 24, "033039": 24, "207264": 24, "442120": 24, "linear_174": 24, "597379": 24, "658676": 24, "768131": 24, "linear_2": [24, 25], "293503": 24, "305265": 24, "877850": 24, "linear_1": [24, 25], "812222": 24, "766452": 24, "487047": 24, "linear_3": [24, 25], "999999": 24, "999755": 24, "031174": 24, "wish": [24, 25], "955k": 24, "18k": 24, "inparam": [24, 25], "inbin": [24, 25], "outparam": [24, 25], "outbin": [24, 25], "99m": 24, "78k": 24, "774k": [24, 25], "496": [24, 25, 36, 40], "replac": [24, 25], "774": [24, 25], "linear": [24, 25, 33], "convolut": [24, 25, 41, 49, 52], "exact": [24, 25], "4x": [24, 25], "comparison": 24, "468000": [25, 29, 51], "lstm_transducer_stateless2": [25, 29, 51], "862": 25, "222": [25, 34, 36], "865": 25, "is_pnnx": 25, "62e404dd3f3a811d73e424199b3408e309c06e1a": [25, 26], "6d7a559": [25, 26], "feb": [25, 26, 33], "147": [25, 26], "rnn_hidden_s": 25, "aux_layer_period": 25, "235": 25, "239": [25, 33], "472": 25, "595": 25, "324": 25, "83137520": 25, "596": 25, "325": 25, "257024": 25, "781812": 25, "327": 25, "84176356": 25, "182": [25, 26, 31, 40], "158": 25, "183": [25, 44, 45], "335": 25, "101": 25, "tracerwarn": [25, 26], "boolean": [25, 26], "caus": [25, 26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "incorrect": [25, 26, 33], "flow": [25, 26], "constant": [25, 26], "futur": [25, 26, 33, 54], "need_pad": 25, "bool": 25, "259": [25, 31], "339": 25, "207": [25, 34, 36], "84": [25, 31], "324m": 25, "321": [25, 31], "107": [25, 40], "318m": 25, "159m": 25, "21k": 25, "159": [25, 36, 47], "37": [25, 31, 33, 36, 44], "861": 25, "266": [25, 26, 36, 40], "431": 25, "342": 25, "343": 25, "379": 25, "268": [25, 36, 40], "317m": 25, "317": 25, "conv_15": 25, "930708": 25, "972025": 25, "conv_16": 25, "978855": 25, "031788": 25, "456645": 25, "conv_17": 25, "868437": 25, "830528": 25, "218575": 25, "linear_18": 25, "107259": 25, "194808": 25, "293236": 25, "linear_19": 25, "193777": 25, "634748": 25, "401705": 25, "linear_20": 25, "259933": 25, "606617": 25, "722160": 25, "linear_21": 25, "186600": 25, "790260": 25, "512129": 25, "linear_22": 25, "759041": 25, "265832": 25, "050053": 25, "linear_23": 25, "931209": 25, "099090": 25, "979767": 25, "linear_24": 25, "324160": 25, "215561": 25, "321835": 25, "linear_25": 25, "800708": 25, "599352": 25, "284134": 25, "linear_26": 25, "492444": 25, "153369": 25, "274391": 25, "linear_27": 25, "660161": 25, "720994": 25, "674126": 25, "linear_28": 25, "415265": 25, "174434": 25, "007133": 25, "linear_29": 25, "038418": 25, "118534": 25, "724262": 25, "linear_30": 25, "072084": 25, "936867": 25, "259155": 25, "linear_31": 25, "342712": 25, "599489": 25, "282787": 25, "linear_32": 25, "340535": 25, "120308": 25, "701103": 25, "linear_33": 25, "846987": 25, "630030": 25, "985939": 25, "linear_34": 25, "686298": 25, "204571": 25, "607586": 25, "linear_35": 25, "904821": 25, "575518": 25, "756420": 25, "linear_36": 25, "806659": 25, "585589": 25, "118401": 25, "linear_37": 25, "402340": 25, "047157": 25, "162680": 25, "linear_38": 25, "174589": 25, "923361": 25, "030258": 25, "linear_39": 25, "178576": 25, "556058": 25, "807705": 25, "linear_40": 25, "901954": 25, "301267": 25, "956539": 25, "linear_41": 25, "839805": 25, "597429": 25, "716181": 25, "linear_42": 25, "178945": 25, "651595": 25, "895699": 25, "829245": 25, "627592": 25, "637907": 25, "746186": 25, "255032": 25, "167313": 25, "000000": 25, "999756": 25, "031013": 25, "345k": 25, "17k": 25, "218m": 25, "counterpart": 25, "bit": [25, 31, 33, 34, 36, 40, 47], "4532": 25, "feedforward": [26, 33, 39, 52], "384": [26, 36], "unmask": 26, "downsampl": [26, 32], "factor": [26, 31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "473": [26, 36], "246": [26, 33, 36, 44, 45], "477": 26, "warm_step": 26, "2000": [26, 34], "feedforward_dim": 26, "attention_dim": [26, 31, 33, 36], "encoder_unmasked_dim": 26, "zipformer_downsampling_factor": 26, "decode_chunk_len": 26, "257": [26, 33, 44, 45], "023": 26, "zipformer2": 26, "419": 26, "At": [26, 31, 36], "stack": 26, "downsampling_factor": 26, "037": 26, "655": 26, "346": 26, "68944004": 26, "347": 26, "260096": 26, "348": [26, 44], "716276": 26, "656": [26, 36], "349": 26, "69920376": 26, "351": 26, "353": 26, "174": [26, 36], "175": 26, "1344": 26, "assert": 26, "cached_len": 26, "num_lay": 26, "1348": 26, "cached_avg": 26, "1352": 26, "cached_kei": 26, "1356": 26, "cached_v": 26, "1360": 26, "cached_val2": 26, "1364": 26, "cached_conv1": 26, "1368": 26, "cached_conv2": 26, "1373": 26, "left_context_len": 26, "1884": 26, "x_size": 26, "2442": 26, "2449": 26, "2469": 26, "2473": 26, "2483": 26, "kv_len": 26, "k": [26, 39, 44, 45, 51, 52, 53], "2570": 26, "attn_output": 26, "bsz": 26, "num_head": 26, "seq_len": 26, "head_dim": 26, "2926": 26, "lorder": 26, "2652": 26, "2653": 26, "embed_dim": 26, "2666": 26, "1543": 26, "in_x_siz": 26, "1637": 26, "1643": 26, "in_channel": 26, "1571": 26, "1763": 26, "src1": 26, "src2": 26, "1779": 26, "dim1": 26, "1780": 26, "dim2": 26, "_trace": 26, "958": 26, "tracer": 26, "instead": [26, 33, 52], "tupl": 26, "namedtupl": 26, "absolut": 26, "know": [26, 37], "side": 26, "allow": [26, 39, 52], "behavior": [26, 33], "_c": 26, "_create_method_from_trac": 26, "646": 26, "357": 26, "102": [26, 31], "embedding_out": 26, "686": 26, "361": [26, 36, 40], "735": 26, "69": 26, "269m": 26, "269": [26, 31, 44, 45], "725": [26, 40], "1022k": 26, "266m": 26, "8m": 26, "509k": 26, "133m": 26, "152k": 26, "4m": 26, "1022": 26, "133": 26, "509": 26, "360": 26, "365": 26, "280": [26, 36], "372": [26, 31], "state": [26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "026": 26, "410": 26, "411": [26, 36], "2028": 26, "2547": 26, "2029": 26, "23316": 26, "23317": 26, "23318": 26, "23319": 26, "23320": 26, "amount": [26, 32], "pad": [26, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "conv2dsubsampl": 26, "arrai": 26, "23300": 26, "element": 26, "repo_url": 27, "basenam": 27, "why": 28, "streaming_asr": [28, 29, 51, 52, 53], "conv_emform": 28, "offline_asr": [28, 39], "baz": 29, "1best": [31, 34, 36, 40, 41, 42, 44, 45], "automag": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "stop": [31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "By": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "musan": [31, 34, 36, 37, 39, 41, 42, 51, 52, 53], "intal": [31, 34], "apt": [31, 34], "permiss": [31, 34], "commandlin": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "experi": [31, 33, 34, 36, 37, 39, 41, 42, 47, 51, 52, 53], "world": [31, 33, 34, 36, 37, 39, 40, 41, 42, 51, 52, 53], "multi": [31, 33, 34, 36, 37, 39, 41, 42, 49, 51, 52, 53], "machin": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "ddp": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "implement": [31, 33, 34, 36, 37, 39, 41, 42, 49, 51, 52, 53], "utter": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "oom": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "due": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "decai": [31, 34, 36, 41, 42, 51], "warmup": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "function": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "get_param": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "directli": [31, 33, 34, 36, 37, 39, 41, 42, 51, 52, 53], "perturb": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "3x150": [31, 33, 34], "450": [31, 33, 34], "visual": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "logdir": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "labelsmooth": 31, "tensorflow": [31, 33, 34, 36, 39, 41, 42, 47, 51, 52], "press": [31, 33, 34, 36, 39, 41, 42, 47, 51, 52, 53], "ctrl": [31, 33, 34, 36, 39, 41, 42, 47, 51, 52, 53], "engw8ksktzqs24zbv5dgcg": 31, "2021": [31, 34, 36, 40, 44, 45, 47], "22t11": 31, "scan": [31, 33, 34, 36, 39, 47, 51, 52], "116068": 31, "scalar": [31, 33, 34, 36, 39, 47, 51, 52], "listen": [31, 33, 34, 39, 47, 51, 52], "xxxx": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "saw": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "consol": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "avoid": [31, 33, 36], "nbest": [31, 36, 42], "lattic": [31, 34, 36, 39, 40, 44, 45, 52, 53], "uniqu": [31, 36, 39, 52, 53], "pkufool": [31, 34, 40], "icefall_asr_aishell_conformer_ctc": 31, "transcrib": [31, 33, 34, 36], "v1": [31, 34, 36, 40, 44, 45], "lang_char": [31, 33], "bac009s0764w0121": [31, 33, 34], "bac009s0764w0122": [31, 33, 34], "bac009s0764w0123": [31, 33, 34], "tran": [31, 34, 36, 40, 44, 45], "graph": [31, 34, 36, 39, 40, 44, 45, 52, 53], "conveni": [31, 34, 36, 37], "eo": [31, 34, 36], "soxi": [31, 33, 34, 36, 40, 47], "sampl": [31, 33, 34, 36, 40, 41, 47, 52, 53], "precis": [31, 33, 34, 36, 39, 40, 47, 52, 53], "67263": [31, 33, 34], "cdda": [31, 33, 34, 36, 40, 47], "sector": [31, 33, 34, 36, 40, 47], "135k": [31, 33, 34], "256k": [31, 33, 34, 36], "sign": [31, 33, 34, 36, 47], "integ": [31, 33, 34, 36, 47], "pcm": [31, 33, 34, 36, 47], "65840": [31, 33, 34], "308": [31, 33, 34], "625": [31, 33, 34], "132k": [31, 33, 34], "64000": [31, 33, 34], "300": [31, 33, 34, 36, 37, 39, 52], "128k": [31, 33, 34, 47], "displai": [31, 33, 34, 36], "topologi": [31, 36], "num_decoder_lay": [31, 36], "vgg_frontend": [31, 33, 36], "use_feat_batchnorm": [31, 36], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 31, "sun": 31, "sep": 31, "33cfe45": 31, "d57a873": 31, "nov": [31, 36], "hw": 31, "kangwei": 31, "icefall_aishell3": 31, "k2_releas": 31, "tokens_fil": 31, "num_path": [31, 36, 39, 52, 53], "ngram_lm_scal": [31, 36], "attention_decoder_scal": [31, 36], "nbest_scal": [31, 36], "sos_id": [31, 36], "eos_id": [31, 36], "4336": [31, 33], "131": [31, 36], "293": [31, 36], "369": [31, 36], "\u751a": [31, 33], "\u81f3": [31, 33], "\u51fa": [31, 33], "\u73b0": [31, 33], "\u4ea4": [31, 33], "\u6613": [31, 33], "\u51e0": [31, 33], "\u4e4e": [31, 33], "\u505c": [31, 33], "\u6b62": 31, "\u7684": [31, 33, 34], "\u60c5": [31, 33], "\u51b5": [31, 33], "\u4e00": [31, 33], "\u4e8c": [31, 33], "\u7ebf": [31, 33, 34], "\u57ce": [31, 33], "\u5e02": [31, 33], "\u867d": [31, 33], "\u7136": [31, 33], "\u4e5f": [31, 33, 34], "\u5904": [31, 33], "\u4e8e": [31, 33], "\u8c03": [31, 33], "\u6574": [31, 33], "\u4e2d": [31, 33, 34], "\u4f46": [31, 33, 34], "\u56e0": [31, 33], "\u4e3a": [31, 33], "\u805a": [31, 33], "\u96c6": [31, 33], "\u4e86": [31, 33, 34], "\u8fc7": [31, 33], "\u591a": [31, 33], "\u516c": [31, 33], "\u5171": [31, 33], "\u8d44": [31, 33], "\u6e90": [31, 33], "371": 31, "683": 31, "684": [31, 47], "651": [31, 47], "654": 31, "659": 31, "752": 31, "887": 31, "340": 31, "370": 31, "\u751a\u81f3": [31, 34], "\u51fa\u73b0": [31, 34], "\u4ea4\u6613": [31, 34], "\u51e0\u4e4e": [31, 34], "\u505c\u6b62": 31, "\u60c5\u51b5": [31, 34], "\u4e00\u4e8c": [31, 34], "\u57ce\u5e02": [31, 34], "\u867d\u7136": [31, 34], "\u5904\u4e8e": [31, 34], "\u8c03\u6574": [31, 34], "\u56e0\u4e3a": [31, 34], "\u805a\u96c6": [31, 34], "\u8fc7\u591a": [31, 34], "\u516c\u5171": [31, 34], "\u8d44\u6e90": [31, 34], "recor": [31, 36], "highest": [31, 36], "966": 31, "821": 31, "822": 31, "826": 31, "916": 31, "345": 31, "889": 31, "limit": [31, 33, 36, 49, 52], "upgrad": [31, 36], "NOT": [31, 33, 36, 47], "checkout": [31, 36], "hlg_decod": [31, 36], "four": [31, 36], "messag": [31, 36, 39, 41, 42, 51, 52, 53], "use_gpu": [31, 36], "word_tabl": [31, 36], "forward": [31, 36, 41], "cu": [31, 36], "int": [31, 36], "char": [31, 36], "98": 31, "150": [31, 36], "693": [31, 44], "165": [31, 36], "nnet_output": [31, 36], "185": [31, 36, 47], "217": [31, 36], "mandarin": 32, "beij": 32, "shell": 32, "technologi": 32, "ltd": 32, "peopl": 32, "accent": 32, "area": 32, "china": 32, "invit": 32, "particip": 32, "conduct": 32, "indoor": 32, "high": 32, "fidel": 32, "microphon": 32, "16khz": 32, "manual": 32, "through": 32, "profession": 32, "annot": 32, "inspect": 32, "free": [32, 37, 51], "academ": 32, "moder": 32, "research": 32, "openslr": 32, "ctc": [32, 35, 38, 42, 43, 46], "stateless": [32, 35, 39, 51, 52, 53], "conv1d": [33, 39, 51, 52, 53], "tanh": 33, "borrow": 33, "ieeexplor": 33, "ieee": 33, "stamp": 33, "jsp": 33, "arnumb": 33, "9054419": 33, "predict": [33, 37, 39, 51, 52, 53], "charact": 33, "unit": 33, "vocabulari": 33, "87939824": 33, "optimized_transduc": 33, "technqiu": 33, "end": [33, 39, 41, 42, 47, 51, 52, 53], "maximum": 33, "emit": 33, "simplifi": [33, 49], "significantli": 33, "degrad": 33, "exactli": 33, "unprun": 33, "advantag": 33, "minim": 33, "pruned_transducer_stateless": [33, 39, 49, 52], "altern": 33, "though": 33, "transducer_stateless_modifi": 33, "pr": 33, "ram": 33, "tri": 33, "prob": [33, 51], "219": [33, 36], "lagz6hrcqxoigbfd5e0y3q": 33, "03t14": 33, "8477": 33, "250": [33, 40], "sym": [33, 39, 52, 53], "beam_search": [33, 39, 52, 53], "decoding_method": 33, "beam_4": 33, "ensur": 33, "poor": 33, "531": [33, 34], "994": [33, 36], "027": 33, "encoder_out_dim": 33, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 33, "50d2281": 33, "mar": 33, "0815224919": 33, "75d558775b": 33, "mmnv8": 33, "72": [33, 36], "878": [33, 45], "880": 33, "891": 33, "113": [33, 36], "userwarn": 33, "__floordiv__": 33, "round": 33, "toward": 33, "trunc": 33, "floor": 33, "keep": [33, 39, 52, 53], "div": 33, "b": [33, 36, 44, 45], "rounding_mod": 33, "divis": 33, "x_len": 33, "163": [33, 36], "\u6ede": 33, "322": 33, "759": 33, "760": 33, "919": 33, "922": 33, "929": 33, "046": 33, "319": [33, 36], "798": 33, "831": [33, 45], "215": [33, 36, 40], "402": 33, "topk_hyp_index": 33, "topk_index": 33, "logit": 33, "583": [33, 45], "lji9mwuorlow3jkdhxwk8a": 34, "13t11": 34, "4454": 34, "icefall_asr_aishell_tdnn_lstm_ctc": 34, "858": [34, 36], "389": [34, 36], "161": [34, 36], "536": 34, "539": 34, "917": 34, "\u505c\u6ede": 34, "mmi": [35, 38], "blank": [35, 38], "skip": [35, 37, 38, 39, 51, 52, 53], "distil": [35, 38], "hubert": [35, 38], "ligru": [35, 43], "full": [36, 37, 39, 41, 42, 51, 52, 53], "libri": [36, 37, 39, 41, 42, 51, 52, 53], "subset": [36, 39, 41, 42, 51, 52, 53], "3x960": [36, 39, 41, 42, 51, 52, 53], "2880": [36, 39, 41, 42, 51, 52, 53], "lzgnetjwrxc3yghnmd4kpw": 36, "24t16": 36, "4540": 36, "sentenc": 36, "piec": 36, "And": [36, 39, 41, 42, 51, 52, 53], "neither": 36, "nor": 36, "5000": 36, "033": 36, "537": 36, "full_libri": [36, 37], "464": 36, "548": 36, "776": 36, "652": [36, 47], "109226120": 36, "714": [36, 44], "206": 36, "944": 36, "1328": 36, "443": [36, 40], "2563": 36, "494": 36, "592": 36, "1715": 36, "52576": 36, "128": 36, "1424": 36, "807": 36, "506": 36, "808": [36, 44], "362": 36, "1477": 36, "2922": 36, "4295": 36, "52343": 36, "396": 36, "3584": 36, "432": 36, "680": [36, 44], "_pickl": 36, "unpicklingerror": 36, "invalid": 36, "hlg_modifi": 36, "g_4_gram": [36, 40, 44, 45], "sentencepiec": 36, "875": [36, 40], "212k": 36, "267440": [36, 40], "1253": [36, 40], "535k": 36, "77200": [36, 40], "154k": 36, "554": 36, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 36, "8d93169": 36, "601": 36, "025": 36, "broffel": 36, "osom": 36, "723": 36, "775": 36, "881": 36, "571": 36, "whole": [36, 40, 44, 45, 52, 53], "857": 36, "979": 36, "055": 36, "117": 36, "051": 36, "363": 36, "959": [36, 45], "546": 36, "598": 36, "599": [36, 40], "833": 36, "834": 36, "915": 36, "076": 36, "110": 36, "397": 36, "999": [36, 39, 52, 53], "concaten": 36, "bucket": 36, "sampler": 36, "1000": 36, "ctc_decod": 36, "ngram_lm_rescor": 36, "attention_rescor": 36, "105": 36, "125": [36, 47], "228": 36, "543": 36, "topo": 36, "547": 36, "729": 36, "703": 36, "545": 36, "122": 36, "126": 36, "135": [36, 47], "153": [36, 47], "945": 36, "475": 36, "191": [36, 44, 45], "398": 36, "515": 36, "deseri": 36, "441": 36, "fsaclass": 36, "loadfsa": 36, "const": 36, "string": 36, "c10": 36, "ignor": 36, "589": 36, "attention_scal": 36, "162": 36, "169": [36, 44, 45], "188": 36, "984": 36, "624": 36, "519": [36, 45], "632": 36, "645": [36, 47], "243": 36, "303": 36, "179": 36, "knowledg": 37, "vector": 37, "mvq": 37, "kd": 37, "pruned_transducer_stateless4": [37, 39, 49, 52], "theoret": 37, "applic": 37, "minor": 37, "thing": 37, "distillation_with_hubert": 37, "Of": 37, "cours": 37, "xl": 37, "proce": 37, "960h": [37, 41], "use_extracted_codebook": 37, "augment": 37, "th": [37, 44, 45], "fine": 37, "embedding_lay": 37, "num_codebook": 37, "under": 37, "vq_fbank_layer36_cb8": 37, "whola": 37, "snippet": 37, "echo": 37, "awk": 37, "split": 37, "pruned_transducer_stateless6": 37, "12359": 37, "spec": 37, "warp": 37, "paid": 37, "suitabl": [39, 51, 52, 53], "pruned_transducer_stateless2": [39, 49, 52], "pruned_transducer_stateless5": [39, 49, 52], "scroll": [39, 41, 42, 51, 52, 53], "arxiv": [39, 51, 52, 53], "2206": [39, 51, 52, 53], "13236": [39, 51, 52, 53], "rework": [39, 49, 52], "daniel": [39, 52, 53], "joint": [39, 51, 52, 53], "contrari": [39, 51, 52, 53], "convent": [39, 51, 52, 53], "recurr": [39, 51, 52, 53], "2x": [39, 52, 53], "littl": [39, 52], "436000": [39, 41, 42, 51, 52, 53], "438000": [39, 41, 42, 51, 52, 53], "qogspbgsr8kzcrmmie9jgw": 39, "20t15": [39, 51, 52], "4468": [39, 51, 52], "210171": [39, 51, 52], "access": [39, 41, 42, 51, 52, 53], "googl": [39, 41, 42, 51, 52, 53], "6008": [39, 41, 42, 51, 52, 53], "localhost": [39, 41, 42, 51, 52, 53], "expos": [39, 41, 42, 51, 52, 53], "proxi": [39, 41, 42, 51, 52, 53], "bind_al": [39, 41, 42, 51, 52, 53], "fast_beam_search": [39, 41, 51, 52, 53], "474000": [39, 51, 52, 53], "largest": [39, 52, 53], "posterior": [39, 41, 52, 53], "algorithm": [39, 52, 53], "pdf": [39, 42, 52, 53], "1211": [39, 52, 53], "3711": [39, 52, 53], "espnet": [39, 52, 53], "net": [39, 52, 53], "beam_search_transduc": [39, 52, 53], "basicli": [39, 52, 53], "topk": [39, 52, 53], "expand": [39, 52, 53], "mode": [39, 52, 53], "being": [39, 52, 53], "hardcod": [39, 52, 53], "composit": [39, 52, 53], "log_prob": [39, 52, 53], "hard": [39, 49, 52, 53], "2211": [39, 52, 53], "00484": [39, 52, 53], "fast_beam_search_lg": [39, 52, 53], "trivial": [39, 52, 53], "fast_beam_search_nbest": [39, 52, 53], "random_path": [39, 52, 53], "shortest": [39, 52, 53], "fast_beam_search_nbest_lg": [39, 52, 53], "logic": [39, 52, 53], "smallest": [39, 51, 52, 53], "normal": [40, 44, 45, 47, 52], "icefall_asr_librispeech_tdnn": 40, "lstm_ctc": 40, "flac": 40, "116k": 40, "140k": 40, "343k": 40, "164k": 40, "105k": 40, "174k": 40, "pretraind": 40, "584": [40, 45], "791": 40, "245": 40, "098": 40, "099": 40, "methond": [40, 44, 45], "631": 40, "010": 40, "guidanc": 41, "bigger": 41, "simpli": 41, "discard": 41, "prevent": 41, "lconv": 41, "encourag": [41, 42, 51], "stabil": [41, 42], "doesn": 41, "warm": [41, 42], "xyozukpeqm62hbilud4upa": [41, 42], "ctc_guide_decode_b": 41, "pretrained_ctc": 41, "jit_pretrained_ctc": 41, "100h": 41, "yfyeung": 41, "wechat": 42, "zipformer_mmi": 42, "worker": [42, 51], "hp": 42, "tdnn_ligru_ctc": 44, "enough": [44, 45, 47], "luomingshuang": [44, 45], "icefall_asr_timit_tdnn_ligru_ctc": 44, "pretrained_average_9_25": 44, "fdhc0_si1559": [44, 45], "felc0_si756": [44, 45], "fmgd0_si1564": [44, 45], "ffprobe": [44, 45], "show_format": [44, 45], "nistspher": [44, 45], "database_id": [44, 45], "database_vers": [44, 45], "utterance_id": [44, 45], "dhc0_si1559": [44, 45], "sample_min": [44, 45], "4176": [44, 45], "sample_max": [44, 45], "5984": [44, 45], "bitrat": [44, 45], "pcm_s16le": [44, 45], "s16": [44, 45], "elc0_si756": [44, 45], "1546": [44, 45], "1989": [44, 45], "mgd0_si1564": [44, 45], "7626": [44, 45], "10573": [44, 45], "660": 44, "695": 44, "697": 44, "819": 44, "829": 44, "sil": [44, 45], "dh": [44, 45], "ih": [44, 45], "uw": [44, 45], "ah": [44, 45], "ii": [44, 45], "z": [44, 45], "aa": [44, 45], "ei": [44, 45], "dx": [44, 45], "d": [44, 45], "uh": [44, 45], "ng": [44, 45], "eh": [44, 45], "jh": [44, 45], "er": [44, 45], "ai": [44, 45], "hh": [44, 45], "aw": 44, "ae": [44, 45], "705": 44, "715": 44, "720": 44, "251": [44, 45], "ch": 44, "icefall_asr_timit_tdnn_lstm_ctc": 45, "pretrained_average_16_25": 45, "816": 45, "827": 45, "unk": 45, "739": 45, "977": 45, "978": 45, "981": 45, "ow": 45, "ykubhb5wrmosxykid1z9eg": 47, "23t23": 47, "icefall_asr_yesno_tdnn": 47, "0_0_1_0_0_1_1_1": 47, "0_0_1_0_1_0_0_1": 47, "0_0_1_1_0_0_0_1": 47, "0_0_1_1_0_1_1_0": 47, "0_0_1_1_1_0_0_0": 47, "0_0_1_1_1_1_0_0": 47, "0_1_0_0_0_1_0_0": 47, "0_1_0_0_1_0_1_0": 47, "0_1_0_1_0_0_0_0": 47, "0_1_0_1_1_1_0_0": 47, "0_1_1_0_0_1_1_1": 47, "0_1_1_1_0_0_1_0": 47, "0_1_1_1_1_0_1_0": 47, "1_0_0_0_0_0_0_0": 47, "1_0_0_0_0_0_1_1": 47, "1_0_0_1_0_1_1_1": 47, "1_0_1_1_0_1_1_1": 47, "1_0_1_1_1_1_0_1": 47, "1_1_0_0_0_1_1_1": 47, "1_1_0_0_1_0_1_1": 47, "1_1_0_1_0_1_0_0": 47, "1_1_0_1_1_0_0_1": 47, "1_1_0_1_1_1_1_0": 47, "1_1_1_0_0_1_0_1": 47, "1_1_1_0_1_0_1_0": 47, "1_1_1_1_0_0_1_0": 47, "1_1_1_1_1_0_0_0": 47, "1_1_1_1_1_1_1_1": 47, "54080": 47, "507": 47, "108k": 47, "119": 47, "650": 47, "139": 47, "143": 47, "198": 47, "181": 47, "186": 47, "187": 47, "correctli": 47, "simplest": 47, "former": 49, "mask": [49, 52, 53], "wenet": 49, "did": 49, "request": 49, "metion": 49, "complic": 49, "techniqu": 49, "bank": 49, "memor": 49, "histori": 49, "introduc": 49, "variant": 49, "pruned_stateless_emformer_rnnt2": 49, "conv_emformer_transducer_stateless": 49, "ourself": 49, "mechan": 49, "onlin": 51, "lstm_transducer_stateless": 51, "lower": 51, "prepare_giga_speech": 51, "cj2vtpiwqhkn9q1tx6ptpg": 51, "dynam": [52, 53], "causal": 52, "short": [52, 53], "2012": 52, "05481": 52, "flag": 52, "indic": [52, 53], "whether": 52, "sequenc": [52, 53], "uniformli": [52, 53], "seen": [52, 53], "97vkxf80ru61cnp2alwzzg": 52, "streaming_decod": [52, 53], "wise": [52, 53], "parallel": [52, 53], "bath": [52, 53], "parallelli": [52, 53], "seem": 52, "benefit": 52, "mdoel": 52, "320m": 53, "550": 53, "scriptmodul": 53, "jit_trace_export": 53, "jit_trace_pretrain": 53, "task": 54}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 22, 28, 29], "creat": [2, 13, 21], "recip": [2, 54], "data": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "prepar": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "train": [2, 9, 16, 18, 21, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "decod": [2, 5, 6, 7, 9, 12, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "pre": [2, 18, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "model": [2, 5, 15, 18, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "lodr": [4, 6], "rnn": 4, "transduc": [4, 6, 7, 24, 25, 26, 33, 39, 51, 52, 53], "wer": [4, 5, 6, 7, 36], "differ": [4, 6, 7], "beam": [4, 6, 7, 33], "size": [4, 6, 7], "languag": 5, "lm": [5, 6, 36], "rescor": [5, 6, 31, 36], "base": [5, 6], "method": [5, 6], "v": [5, 6], "shallow": [5, 6, 7], "fusion": [5, 6, 7], "The": [5, 6, 33], "number": [5, 6], "each": [5, 6], "field": [5, 6], "i": [5, 6], "test": [5, 6, 7, 21, 24, 25, 26], "clean": [5, 6, 7], "other": [5, 6], "time": [5, 6, 7], "docker": [8, 9], "introduct": [9, 49], "view": 9, "avail": 9, "tag": 9, "download": [9, 11, 21, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 52, 53], "imag": 9, "run": [9, 22], "gpu": 9, "cpu": 9, "yesno": [9, 46], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 27], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 21, 24, 25, 26, 31, 33, 34, 36], "0": [10, 21], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": 11, "environ": [13, 21], "setup": 13, "virtual": [13, 21], "instal": [13, 21, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "depend": 13, "icefal": [13, 14, 20, 21, 24, 25, 26], "dummi": 14, "tutori": 14, "export": [15, 22, 23, 24, 25, 26, 27, 28, 29, 30, 39, 41, 42, 51, 52, 53], "paramet": 15, "via": [15, 24, 25, 26], "state_dict": [15, 22, 39, 41, 42, 51, 52, 53], "torch": [15, 21, 24, 25, 26, 28, 29, 39, 41, 42, 51, 52, 53], "jit": [15, 24, 25, 26, 28, 29, 39, 41, 42, 51, 52, 53], "script": [15, 28, 39, 41, 42, 52, 53], "onnx": [15, 27], "huggingfac": [17, 19], "space": 19, "youtub": [19, 21], "video": [19, 21], "content": [20, 54], "cuda": 21, "toolkit": 21, "cudnn": 21, "torchaudio": 21, "2": [21, 24, 25, 26, 31, 33, 34, 36], "k2": 21, "3": [21, 24, 25, 26, 31, 33, 36], "lhots": 21, "4": [21, 24, 25, 26], "exampl": [21, 27, 31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "5": [21, 24, 25, 26], "6": [21, 24, 25, 26], "your": 21, "when": [22, 28, 29], "us": [22, 28, 29, 39, 41, 42, 51, 52, 53], "py": 22, "ncnn": [23, 24, 25, 26], "convemform": 24, "pnnx": [24, 25, 26], "trace": [24, 25, 26, 29, 51, 53], "torchscript": [24, 25, 26], "modifi": [24, 25, 26, 33], "encod": [24, 25, 26], "sherpa": [24, 25, 26, 27, 39, 52, 53], "7": [24, 25], "option": [24, 25, 31, 34, 36, 39, 41, 42, 51, 52, 53], "int8": [24, 25], "quantiz": [24, 25], "lstm": [25, 34, 40, 45, 51], "stream": [26, 35, 48, 49, 52, 53], "zipform": [26, 41, 42, 53], "sound": 27, "conform": [31, 36, 49], "ctc": [31, 34, 36, 40, 41, 44, 45, 47], "configur": [31, 34, 36, 39, 41, 42, 51, 52, 53], "log": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "usag": [31, 33, 34, 36, 39, 41, 42, 51, 52, 53], "case": [31, 33, 34, 36], "kaldifeat": [31, 33, 34, 36, 40, 44, 45, 47], "hlg": [31, 34, 36], "attent": [31, 36], "colab": [31, 33, 34, 36, 40, 44, 45, 47], "notebook": [31, 33, 34, 36, 40, 44, 45, 47], "deploy": [31, 36], "c": [31, 36], "aishel": 32, "stateless": 33, "loss": 33, "todo": 33, "greedi": 33, "search": 33, "tdnn": [34, 40, 44, 45, 47], "non": 35, "asr": [35, 48], "comput": 36, "n": 36, "gram": 36, "distil": 37, "hubert": 37, "codebook": 37, "index": 37, "librispeech": [38, 50], "prune": [39, 52], "statelessx": [39, 52], "pretrain": [39, 41, 42, 51, 52, 53], "deploi": [39, 52, 53], "infer": [40, 44, 45, 47], "blank": 41, "skip": 41, "mmi": 42, "timit": 43, "ligru": 44, "emform": 49, "which": 51, "simul": [52, 53], "real": [52, 53], "tabl": 54}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [33, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [21, "training"], [31, "training"], [33, "training"], [34, "training"], [36, "training"], [37, "training"], [39, "training"], [40, "training"], [41, "training"], [42, "training"], [44, "training"], [45, "training"], [47, "training"], [51, "training"], [52, "training"], [53, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [21, "decoding"], [31, "decoding"], [33, "decoding"], [34, "decoding"], [36, "decoding"], [37, "decoding"], [39, "decoding"], [40, "decoding"], [41, "decoding"], [42, "decoding"], [44, "decoding"], [45, "decoding"], [47, "decoding"], [51, "decoding"], [52, "decoding"], [53, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[5, "id1"], [6, "id3"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [49, "introduction"]], "View available tags": [[9, "view-available-tags"]], "Download a docker image": [[9, "download-a-docker-image"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [21, "data-preparation"], [31, "data-preparation"], [34, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"], [39, "data-preparation"], [40, "data-preparation"], [41, "data-preparation"], [42, "data-preparation"], [44, "data-preparation"], [45, "data-preparation"], [47, "data-preparation"], [51, "data-preparation"], [52, "data-preparation"], [53, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Huggingface": [[17, "huggingface"]], "Pre-trained models": [[18, "pre-trained-models"]], "Huggingface spaces": [[19, "huggingface-spaces"]], "YouTube Video": [[19, "youtube-video"], [21, "youtube-video"]], "Icefall": [[20, "icefall"]], "Contents:": [[20, null]], "Installation": [[21, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[21, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[21, "install-torch-and-torchaudio"]], "(2) Install k2": [[21, "install-k2"]], "(3) Install lhotse": [[21, "install-lhotse"]], "(4) Download icefall": [[21, "download-icefall"]], "Installation example": [[21, "installation-example"]], "(1) Create a virtual environment": [[21, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[21, "id1"]], "(3) Install torch and torchaudio": [[21, "id2"]], "(4) Install k2": [[21, "id3"]], "(5) Install lhotse": [[21, "id5"]], "(6) Download icefall": [[21, "id6"]], "Test Your Installation": [[21, "test-your-installation"]], "Export model.state_dict()": [[22, "export-model-state-dict"], [39, "export-model-state-dict"], [41, "export-model-state-dict"], [42, "export-model-state-dict"], [51, "export-model-state-dict"], [52, "export-model-state-dict"], [53, "export-model-state-dict"]], "When to use it": [[22, "when-to-use-it"], [28, "when-to-use-it"], [29, "when-to-use-it"]], "How to export": [[22, "how-to-export"], [28, "how-to-export"], [29, "how-to-export"]], "How to use the exported model": [[22, "how-to-use-the-exported-model"], [28, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[22, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[23, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[24, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[24, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[24, "install-ncnn-and-pnnx"], [25, "install-ncnn-and-pnnx"], [26, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[24, "export-the-model-via-torch-jit-trace"], [25, "export-the-model-via-torch-jit-trace"], [26, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[24, "export-torchscript-model-via-pnnx"], [25, "export-torchscript-model-via-pnnx"], [26, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[24, "test-the-exported-models-in-icefall"], [25, "test-the-exported-models-in-icefall"], [26, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[24, "modify-the-exported-encoder-for-sherpa-ncnn"], [25, "modify-the-exported-encoder-for-sherpa-ncnn"], [26, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[24, "optional-int8-quantization-with-sherpa-ncnn"], [25, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[25, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[26, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[27, "export-to-onnx"]], "sherpa-onnx": [[27, "sherpa-onnx"]], "Example": [[27, "example"]], "Download the pre-trained model": [[27, "download-the-pre-trained-model"], [31, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [40, "download-the-pre-trained-model"], [44, "download-the-pre-trained-model"], [45, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"]], "Export the model to ONNX": [[27, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[27, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[28, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[29, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[29, "how-to-use-the-exported-models"]], "Model export": [[30, "model-export"]], "Conformer CTC": [[31, "conformer-ctc"], [36, "conformer-ctc"]], "Configurable options": [[31, "configurable-options"], [34, "configurable-options"], [36, "configurable-options"], [39, "configurable-options"], [41, "configurable-options"], [42, "configurable-options"], [51, "configurable-options"], [52, "configurable-options"], [53, "configurable-options"]], "Pre-configured options": [[31, "pre-configured-options"], [34, "pre-configured-options"], [36, "pre-configured-options"], [39, "pre-configured-options"], [41, "pre-configured-options"], [42, "pre-configured-options"], [51, "pre-configured-options"], [52, "pre-configured-options"], [53, "pre-configured-options"]], "Training logs": [[31, "training-logs"], [33, "training-logs"], [34, "training-logs"], [36, "training-logs"], [39, "training-logs"], [41, "training-logs"], [42, "training-logs"], [51, "training-logs"], [52, "training-logs"], [53, "training-logs"]], "Usage examples": [[31, "usage-examples"], [33, "usage-examples"], [34, "usage-examples"], [36, "usage-examples"]], "Case 1": [[31, "case-1"], [33, "case-1"], [34, "case-1"], [36, "case-1"]], "Case 2": [[31, "case-2"], [33, "case-2"], [34, "case-2"], [36, "case-2"]], "Case 3": [[31, "case-3"], [33, "case-3"], [36, "case-3"]], "Pre-trained Model": [[31, "pre-trained-model"], [33, "pre-trained-model"], [34, "pre-trained-model"], [36, "pre-trained-model"], [40, "pre-trained-model"], [44, "pre-trained-model"], [45, "pre-trained-model"], [47, "pre-trained-model"]], "Install kaldifeat": [[31, "install-kaldifeat"], [33, "install-kaldifeat"], [34, "install-kaldifeat"], [36, "install-kaldifeat"], [40, "install-kaldifeat"], [44, "install-kaldifeat"], [45, "install-kaldifeat"]], "Usage": [[31, "usage"], [33, "usage"], [34, "usage"], [36, "usage"]], "CTC decoding": [[31, "ctc-decoding"], [36, "ctc-decoding"], [36, "id2"]], "HLG decoding": [[31, "hlg-decoding"], [31, "id2"], [34, "hlg-decoding"], [36, "hlg-decoding"], [36, "id3"]], "HLG decoding + attention decoder rescoring": [[31, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[31, "colab-notebook"], [33, "colab-notebook"], [34, "colab-notebook"], [36, "colab-notebook"], [40, "colab-notebook"], [44, "colab-notebook"], [45, "colab-notebook"], [47, "colab-notebook"]], "Deployment with C++": [[31, "deployment-with-c"], [36, "deployment-with-c"]], "aishell": [[32, "aishell"]], "Stateless Transducer": [[33, "stateless-transducer"]], "The Model": [[33, "the-model"]], "The Loss": [[33, "the-loss"]], "Todo": [[33, "id1"]], "Greedy search": [[33, "greedy-search"]], "Beam search": [[33, "beam-search"]], "Modified Beam search": [[33, "modified-beam-search"]], "TDNN-LSTM CTC": [[34, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[35, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[36, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[36, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[37, "distillation-with-hubert"]], "Codebook index preparation": [[37, "codebook-index-preparation"]], "LibriSpeech": [[38, "librispeech"], [50, "librispeech"]], "Pruned transducer statelessX": [[39, "pruned-transducer-statelessx"], [52, "pruned-transducer-statelessx"]], "Usage example": [[39, "usage-example"], [41, "usage-example"], [42, "usage-example"], [51, "usage-example"], [52, "usage-example"], [53, "usage-example"]], "Export Model": [[39, "export-model"], [52, "export-model"], [53, "export-model"]], "Export model using torch.jit.script()": [[39, "export-model-using-torch-jit-script"], [41, "export-model-using-torch-jit-script"], [42, "export-model-using-torch-jit-script"], [52, "export-model-using-torch-jit-script"], [53, "export-model-using-torch-jit-script"]], "Download pretrained models": [[39, "download-pretrained-models"], [41, "download-pretrained-models"], [42, "download-pretrained-models"], [51, "download-pretrained-models"], [52, "download-pretrained-models"], [53, "download-pretrained-models"]], "Deploy with Sherpa": [[39, "deploy-with-sherpa"], [52, "deploy-with-sherpa"], [53, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[40, "tdnn-lstm-ctc"], [45, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[40, "inference-with-a-pre-trained-model"], [44, "inference-with-a-pre-trained-model"], [45, "inference-with-a-pre-trained-model"], [47, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[41, "zipformer-ctc-blank-skip"]], "Export models": [[41, "export-models"], [42, "export-models"], [51, "export-models"]], "Zipformer MMI": [[42, "zipformer-mmi"]], "TIMIT": [[43, "timit"]], "TDNN-LiGRU-CTC": [[44, "tdnn-ligru-ctc"]], "YesNo": [[46, "yesno"]], "TDNN-CTC": [[47, "tdnn-ctc"]], "Download kaldifeat": [[47, "download-kaldifeat"]], "Streaming ASR": [[48, "streaming-asr"]], "Streaming Conformer": [[49, "streaming-conformer"]], "Streaming Emformer": [[49, "streaming-emformer"]], "LSTM Transducer": [[51, "lstm-transducer"]], "Which model to use": [[51, "which-model-to-use"]], "Export model using torch.jit.trace()": [[51, "export-model-using-torch-jit-trace"], [53, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[52, "simulate-streaming-decoding"], [53, "simulate-streaming-decoding"]], "Real streaming decoding": [[52, "real-streaming-decoding"], [53, "real-streaming-decoding"]], "Zipformer Transducer": [[53, "zipformer-transducer"]], "Recipes": [[54, "recipes"]], "Table of Contents": [[54, null]]}, "indexentries": {}})
\ No newline at end of file