diff --git a/.buildinfo b/.buildinfo
index 46f8d58fb..d727d6619 100644
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: cfc3e6ecc44ed7573f700065af8738a7
+config: 3ca2e66d59e42ffdb5e0a5ba2153f99e
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/_images/librispeech-lstm-transducer-tensorboard-log.png b/_images/librispeech-lstm-transducer-tensorboard-log.png
new file mode 100644
index 000000000..cc475a45f
Binary files /dev/null and b/_images/librispeech-lstm-transducer-tensorboard-log.png differ
diff --git a/_sources/recipes/librispeech/index.rst.txt b/_sources/recipes/librispeech/index.rst.txt
index 5fa08ab6b..6c91b6750 100644
--- a/_sources/recipes/librispeech/index.rst.txt
+++ b/_sources/recipes/librispeech/index.rst.txt
@@ -6,3 +6,4 @@ LibriSpeech
 
    tdnn_lstm_ctc
    conformer_ctc
+   lstm_pruned_stateless_transducer
diff --git a/_sources/recipes/librispeech/lstm_pruned_stateless_transducer.rst.txt b/_sources/recipes/librispeech/lstm_pruned_stateless_transducer.rst.txt
new file mode 100644
index 000000000..0aeccb70a
--- /dev/null
+++ b/_sources/recipes/librispeech/lstm_pruned_stateless_transducer.rst.txt
@@ -0,0 +1,625 @@
+Transducer
+==========
+
+.. hint::
+
+   Please scroll down to the bottom of this page to find download links
+   for pretrained models if you don't want to train a model from scratch.
+
+
+This tutorial shows you how to train a transducer model
+with the `LibriSpeech <https://www.openslr.org/12>`_ dataset.
+
+We use pruned RNN-T to compute the loss.
+
+.. note::
+
+   You can find the paper about pruned RNN-T at the following address:
+
+   `<https://arxiv.org/abs/2206.13236>`_
+
+The transducer model consists of 3 parts:
+
+  - Encoder, a.k.a, transcriber. We use an LSTM model
+  - Decoder, a.k.a, predictor. We use a model consisting of ``nn.Embedding``
+    and ``nn.Conv1d``
+  - Joiner, a.k.a, the joint network.
+
+.. caution::
+
+   Contrary to the conventional RNN-T models, we use a stateless decoder.
+   That is, it has no recurrent connections.
+
+.. hint::
+
+   Since the encoder model is an LSTM, not Transformer/Conformer, the
+   resulting model is suitable for streaming/online ASR.
+
+
+Which model to use
+------------------
+
+Currently, there are two folders about LSTM stateless transducer training:
+
+  - ``(1)`` `<https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless>`_
+
+    This recipe uses only LibriSpeech during training.
+
+  - ``(2)`` `<https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
+
+    This recipe uses GigaSpeech + LibriSpeech during training.
+
+``(1)`` and ``(2)`` use the same model architecture. The only difference is that ``(2)`` supports
+multi-dataset. Since ``(2)`` uses more data, it has a lower WER than ``(1)`` but it needs
+more training time.
+
+We use ``lstm_transducer_stateless2`` as an example below.
+
+.. note::
+
+   You need to download the `GigaSpeech <https://github.com/SpeechColab/GigaSpeech>`_ dataset
+   to run ``(2)``. If you have only ``LibriSpeech`` dataset available, feel free to use ``(1)``.
+
+Data preparation
+----------------
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./prepare.sh
+
+  # If you use (1), you can **skip** the following command
+  $ ./prepare_giga_speech.sh
+
+The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
+All you need to do is to run it.
+
+The data preparation contains several stages, you can use the following two
+options:
+
+  - ``--stage``
+  - ``--stop-stage``
+
+to control which stage(s) should be run. By default, all stages are executed.
+
+
+For example,
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./prepare.sh --stage 0 --stop-stage 0
+
+means to run only stage 0.
+
+To run stage 2 to stage 5, use:
+
+.. code-block:: bash
+
+  $ ./prepare.sh --stage 2 --stop-stage 5
+
+.. hint::
+
+  If you have pre-downloaded the `LibriSpeech <https://www.openslr.org/12>`_
+  dataset and the `musan <http://www.openslr.org/17/>`_ dataset, say,
+  they are saved in ``/tmp/LibriSpeech`` and ``/tmp/musan``, you can modify
+  the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
+  ``./prepare.sh`` won't re-download them.
+
+.. note::
+
+  All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
+  are saved in ``./data`` directory.
+
+We provide the following YouTube video showing how to run ``./prepare.sh``.
+
+.. note::
+
+   To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
+   the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
+
+      `<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
+
+..  youtube:: ofEIoJL-mGM
+
+Training
+--------
+
+Configurable options
+~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./lstm_transducer_stateless2/train.py --help
+
+shows you the training options that can be passed from the commandline.
+The following options are used quite often:
+
+  - ``--full-libri``
+
+    If it's True, the training part uses all the training data, i.e.,
+    960 hours. Otherwise, the training part uses only the subset
+    ``train-clean-100``, which has 100 hours of training data.
+
+    .. CAUTION::
+
+      The training set is perturbed by speed with two factors: 0.9 and 1.1.
+      If ``--full-libri`` is True, each epoch actually processes
+      ``3x960 == 2880`` hours of data.
+
+  - ``--num-epochs``
+
+    It is the number of epochs to train. For instance,
+    ``./lstm_transducer_stateless2/train.py --num-epochs 30`` trains for 30 epochs
+    and generates ``epoch-1.pt``, ``epoch-2.pt``, ..., ``epoch-30.pt``
+    in the folder ``./lstm_transducer_stateless2/exp``.
+
+  - ``--start-epoch``
+
+    It's used to resume training.
+    ``./lstm_transducer_stateless2/train.py --start-epoch 10`` loads the
+    checkpoint ``./lstm_transducer_stateless2/exp/epoch-9.pt`` and starts
+    training from epoch 10, based on the state from epoch 9.
+
+  - ``--world-size``
+
+    It is used for multi-GPU single-machine DDP training.
+
+      - (a) If it is 1, then no DDP training is used.
+
+      - (b) If it is 2, then GPU 0 and GPU 1 are used for DDP training.
+
+    The following shows some use cases with it.
+
+      **Use case 1**: You have 4 GPUs, but you only want to use GPU 0 and
+      GPU 2 for training. You can do the following:
+
+        .. code-block:: bash
+
+          $ cd egs/librispeech/ASR
+          $ export CUDA_VISIBLE_DEVICES="0,2"
+          $ ./lstm_transducer_stateless2/train.py --world-size 2
+
+      **Use case 2**: You have 4 GPUs and you want to use all of them
+      for training. You can do the following:
+
+        .. code-block:: bash
+
+          $ cd egs/librispeech/ASR
+          $ ./lstm_transducer_stateless2/train.py --world-size 4
+
+      **Use case 3**: You have 4 GPUs but you only want to use GPU 3
+      for training. You can do the following:
+
+        .. code-block:: bash
+
+          $ cd egs/librispeech/ASR
+          $ export CUDA_VISIBLE_DEVICES="3"
+          $ ./lstm_transducer_stateless2/train.py --world-size 1
+
+    .. caution::
+
+      Only multi-GPU single-machine DDP training is implemented at present.
+      Multi-GPU multi-machine DDP training will be added later.
+
+  - ``--max-duration``
+
+    It specifies the number of seconds over all utterances in a
+    batch, before **padding**.
+    If you encounter CUDA OOM, please reduce it.
+
+    .. HINT::
+
+      Due to padding, the number of seconds of all utterances in a
+      batch will usually be larger than ``--max-duration``.
+
+      A larger value for ``--max-duration`` may cause OOM during training,
+      while a smaller value may increase the training time. You have to
+      tune it.
+
+  - ``--giga-prob``
+
+    The probability to select a batch from the ``GigaSpeech`` dataset.
+    Note: It is available only for ``(2)``.
+
+Pre-configured options
+~~~~~~~~~~~~~~~~~~~~~~
+
+There are some training options, e.g., weight decay,
+number of warmup steps, results dir, etc,
+that are not passed from the commandline.
+They are pre-configured by the function ``get_params()`` in
+`lstm_transducer_stateless2/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/train.py>`_
+
+You don't need to change these pre-configured parameters. If you really need to change
+them, please modify ``./lstm_transducer_stateless2/train.py`` directly.
+
+Training logs
+~~~~~~~~~~~~~
+
+Training logs and checkpoints are saved in ``lstm_transducer_stateless2/exp``.
+You will find the following files in that directory:
+
+  - ``epoch-1.pt``, ``epoch-2.pt``, ...
+
+    These are checkpoint files saved at the end of each epoch, containing model
+    ``state_dict`` and optimizer ``state_dict``.
+    To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
+
+      .. code-block:: bash
+
+        $ ./lstm_transducer_stateless2/train.py --start-epoch 11
+
+  - ``checkpoint-436000.pt``, ``checkpoint-438000.pt``, ...
+
+    These are checkpoint files saved every ``--save-every-n`` batches,
+    containing model ``state_dict`` and optimizer ``state_dict``.
+    To resume training from some checkpoint, say ``checkpoint-436000``, you can use:
+
+      .. code-block:: bash
+
+        $ ./lstm_transducer_stateless2/train.py --start-batch 436000
+
+  - ``tensorboard/``
+
+    This folder contains TensorBoard logs. Training loss, validation loss, learning
+    rate, etc, are recorded in these logs. You can visualize them by:
+
+      .. code-block:: bash
+
+        $ cd lstm_transducer_stateless2/exp/tensorboard
+        $ tensorboard dev upload --logdir . --description "LSTM transducer training for LibriSpeech with icefall"
+
+    It will print something like below:
+
+      .. code-block::
+
+        TensorFlow installation not found - running with reduced feature set.
+        Upload started and will continue reading any new data as it's added to the logdir.
+
+        To stop uploading, press Ctrl-C.
+
+        New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/cj2vtPiwQHKN9Q1tx6PTpg/
+
+        [2022-09-20T15:50:50] Started scanning logdir.
+        Uploading 4468 scalars...
+        [2022-09-20T15:53:02] Total uploaded: 210171 scalars, 0 tensors, 0 binary objects
+        Listening for new data in logdir...
+
+    Note there is a URL in the above output, click it and you will see
+    the following screenshot:
+
+      .. figure:: images/librispeech-lstm-transducer-tensorboard-log.png
+         :width: 600
+         :alt: TensorBoard screenshot
+         :align: center
+         :target: https://tensorboard.dev/experiment/lzGnETjwRxC3yghNMd4kPw/
+
+         TensorBoard screenshot.
+
+  .. hint::
+
+    If you don't have access to google, you can use the following command
+    to view the tensorboard log locally:
+
+      .. code-block:: bash
+
+        cd lstm_transducer_stateless2/exp/tensorboard
+        tensorboard --logdir . --port 6008
+
+    It will print the following message:
+
+      .. code-block::
+
+        Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
+        TensorBoard 2.8.0 at http://localhost:6008/ (Press CTRL+C to quit)
+
+    Now start your browser and go to `<http://localhost:6008>`_ to view the tensorboard
+    logs.
+
+
+  - ``log/log-train-xxxx``
+
+    It is the detailed training log in text format, same as the one
+    you saw printed to the console during training.
+
+Usage example
+~~~~~~~~~~~~~
+
+You can use the following command to start the training using 8 GPUs:
+
+.. code-block:: bash
+
+  export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+  ./lstm_transducer_stateless2/train.py \
+    --world-size 8 \
+    --num-epochs 35 \
+    --start-epoch 1 \
+    --full-libri 1 \
+    --exp-dir lstm_transducer_stateless2/exp \
+    --max-duration 500 \
+    --use-fp16 0 \
+    --lr-epochs 10 \
+    --num-workers 2 \
+    --giga-prob 0.9
+
+Decoding
+--------
+
+The decoding part uses checkpoints saved by the training part, so you have
+to run the training part first.
+
+.. hint::
+
+   There are two kinds of checkpoints:
+
+    - (1) ``epoch-1.pt``, ``epoch-2.pt``, ..., which are saved at the end
+      of each epoch. You can pass ``--epoch`` to
+      ``lstm_transducer_stateless2/decode.py`` to use them.
+
+    - (2) ``checkpoints-436000.pt``, ``epoch-438000.pt``, ..., which are saved
+      every ``--save-every-n`` batches. You can pass ``--iter`` to
+      ``lstm_transducer_stateless2/decode.py`` to use them.
+
+    We suggest that you try both types of checkpoints and choose the one
+    that produces the lowest WERs.
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./lstm_transducer_stateless2/decode.py --help
+
+shows the options for decoding.
+
+The following shows two examples:
+
+.. code-block:: bash
+
+  for m in greedy_search fast_beam_search modified_beam_search; do
+    for epoch in 17; do
+      for avg in 1 2; do
+        ./lstm_transducer_stateless2/decode.py \
+          --epoch $epoch \
+          --avg $avg \
+          --exp-dir lstm_transducer_stateless2/exp \
+          --max-duration 600 \
+          --num-encoder-layers 12 \
+          --rnn-hidden-size 1024 \
+          --decoding-method $m \
+          --use-averaged-model True \
+          --beam 4 \
+          --max-contexts 4 \
+          --max-states 8 \
+          --beam-size 4
+      done
+    done
+  done
+
+
+.. code-block:: bash
+
+  for m in greedy_search fast_beam_search modified_beam_search; do
+    for iter in 474000; do
+      for avg in 8 10 12 14 16 18; do
+        ./lstm_transducer_stateless2/decode.py \
+          --iter $iter \
+          --avg $avg \
+          --exp-dir lstm_transducer_stateless2/exp \
+          --max-duration 600 \
+          --num-encoder-layers 12 \
+          --rnn-hidden-size 1024 \
+          --decoding-method $m \
+          --use-averaged-model True \
+          --beam 4 \
+          --max-contexts 4 \
+          --max-states 8 \
+          --beam-size 4
+      done
+    done
+  done
+
+Export models
+-------------
+
+`lstm_transducer_stateless2/export.py <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/export.py>`_ supports to export checkpoints from ``lstm_transducer_stateless2/exp`` in the following ways.
+
+Export ``model.state_dict()``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Checkpoints saved by ``lstm_transducer_stateless2/train.py`` also include
+``optimizer.state_dict()``. It is useful for resuming training. But after training,
+we are interested only in ``model.state_dict()``. You can use the following
+command to extract ``model.state_dict()``.
+
+.. code-block:: bash
+
+  # Assume that --iter 468000 --avg 16 produces the smallest WER
+  # (You can get such information after running ./lstm_transducer_stateless2/decode.py)
+
+  iter=468000
+  avg=16
+
+  ./lstm_transducer_stateless2/export.py \
+    --exp-dir ./lstm_transducer_stateless2/exp \
+    --bpe-model data/lang_bpe_500/bpe.model \
+    --iter $iter \
+    --avg  $avg
+
+It will generate a file ``./lstm_transducer_stateless2/exp/pretrained.pt``.
+
+.. hint::
+
+   To use the generated ``pretrained.pt`` for ``lstm_transducer_stateless2/decode.py``,
+   you can run:
+
+   .. code-block:: bash
+
+      cd lstm_transducer_stateless2/exp
+      ln -s pretrained epoch-9999.pt
+
+   And then pass `--epoch 9999 --avg 1 --use-averaged-model 0` to
+   ``./lstm_transducer_stateless2/decode.py``.
+
+To use the exported model with ``./lstm_transducer_stateless2/pretrained.py``, you
+can run:
+
+.. code-block:: bash
+
+  ./lstm_transducer_stateless2/pretrained.py \
+    --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt \
+    --bpe-model ./data/lang_bpe_500/bpe.model \
+    --method greedy_search \
+    /path/to/foo.wav \
+    /path/to/bar.wav
+
+Export model using ``torch.jit.trace()``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+  iter=468000
+  avg=16
+
+  ./lstm_transducer_stateless2/export.py \
+    --exp-dir ./lstm_transducer_stateless2/exp \
+    --bpe-model data/lang_bpe_500/bpe.model \
+    --iter $iter \
+    --avg  $avg \
+    --jit-trace 1
+
+It will generate 3 files:
+
+  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace.pt``
+  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace.pt``
+  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace.pt``
+
+To use the generated files with ``./lstm_transducer_stateless2/jit_pretrained``:
+
+.. code-block:: bash
+
+  ./lstm_transducer_stateless2/jit_pretrained.py \
+    --bpe-model ./data/lang_bpe_500/bpe.model \
+    --encoder-model-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace.pt \
+    --decoder-model-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace.pt \
+    --joiner-model-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace.pt \
+    /path/to/foo.wav \
+    /path/to/bar.wav
+
+Export model for ncnn
+~~~~~~~~~~~~~~~~~~~~~
+
+We support exporting pretrained LSTM transducer models to
+`ncnn <https://github.com/tencent/ncnn>`_ using
+`pnnx <https://github.com/Tencent/ncnn/tree/master/tools/pnnx>`_.
+
+First, let us install a modified version of ``ncnn``:
+
+.. code-block:: bash
+
+  git clone https://github.com/csukuangfj/ncnn
+  cd ncnn
+  git submodule update --recursive --init
+  python3 setup.py bdist_wheel
+  ls -lh dist/
+  pip install ./dist/*.whl
+
+  # now build pnnx
+  cd tools/pnnx
+  mkdir build
+  cd build
+  make -j4
+  export PATH=$PWD/src:$PATH
+
+  ./src/pnnx
+
+.. note::
+
+   We assume that you have added the path to the binary ``pnnx`` to the
+   environment variable ``PATH``.
+
+Second, let us export the model using ``torch.jit.trace()`` that is suitable
+for ``pnnx``:
+
+.. code-block:: bash
+
+  iter=468000
+  avg=16
+
+  ./lstm_transducer_stateless2/export.py \
+    --exp-dir ./lstm_transducer_stateless2/exp \
+    --bpe-model data/lang_bpe_500/bpe.model \
+    --iter $iter \
+    --avg  $avg \
+    --pnnx 1
+
+It will generate 3 files:
+
+  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt``
+  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt``
+  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt``
+
+Third, convert torchscript model to ``ncnn`` format:
+
+.. code-block::
+
+   pnnx ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt
+   pnnx ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt
+   pnnx ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt
+
+It will generate the following files:
+
+  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param``
+  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin``
+  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param``
+  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin``
+  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param``
+  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin``
+
+To use the above generate files, run:
+
+.. code-block:: bash
+
+./lstm_transducer_stateless2/ncnn-decode.py \
+ --bpe-model-filename ./data/lang_bpe_500/bpe.model \
+ --encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
+ --encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
+ --decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
+ --decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
+ --joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
+ --joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
+ /path/to/foo.wav
+
+.. code-block:: bash
+
+./lstm_transducer_stateless2/streaming-ncnn-decode.py \
+ --bpe-model-filename ./data/lang_bpe_500/bpe.model \
+ --encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
+ --encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
+ --decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
+ --decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
+ --joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
+ --joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
+ /path/to/foo.wav
+
+To use the above generated files in C++, please see
+`<https://github.com/k2-fsa/sherpa-ncnn>`_
+
+It is able to generate a static linked library that can be run on Linux, Windows,
+macOS, Raspberry Pi, etc.
+
+Download pretrained models
+--------------------------
+
+If you don't want to train from scratch, you can download the pretrained models
+by visiting the following links:
+
+  - `<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
+
+  - `<https://huggingface.co/Zengwei/icefall-asr-librispeech-lstm-transducer-stateless-2022-08-18>`_
+
+  See `<https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md>`_
+  for the details of the above pretrained models
+
+You can find more usages of the pretrained models in
+`<https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/index.html>`_
diff --git a/contributing/code-style.html b/contributing/code-style.html
index 5dfd320b0..86f4cb1a0 100644
--- a/contributing/code-style.html
+++ b/contributing/code-style.html
@@ -108,7 +108,7 @@ $ pre-commit install
 <div><figure class="align-center" id="id2">
 <a class="reference internal image-reference" href="../_images/pre-commit-check.png"><img alt="../_images/pre-commit-check.png" src="../_images/pre-commit-check.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 7 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Failed).</span><a class="headerlink" href="#id2" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 8 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Failed).</span><a class="headerlink" href="#id2" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
@@ -127,7 +127,7 @@ it should succeed this time:</p>
 <div><figure class="align-center" id="id3">
 <a class="reference internal image-reference" href="../_images/pre-commit-check-success.png"><img alt="../_images/pre-commit-check-success.png" src="../_images/pre-commit-check-success.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 8 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Succeeded).</span><a class="headerlink" href="#id3" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 9 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Succeeded).</span><a class="headerlink" href="#id3" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/contributing/doc.html b/contributing/doc.html
index a70c4ab8e..9c9fba7f2 100644
--- a/contributing/doc.html
+++ b/contributing/doc.html
@@ -116,7 +116,7 @@ the following:</p>
 <div><figure class="align-center" id="id1">
 <a class="reference internal image-reference" href="../_images/doc-contrib.png"><img alt="../_images/doc-contrib.png" src="../_images/doc-contrib.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 6 </span><span class="caption-text">View generated documentation locally with <code class="docutils literal notranslate"><span class="pre">python3</span> <span class="pre">-m</span> <span class="pre">http.server</span></code>.</span><a class="headerlink" href="#id1" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 7 </span><span class="caption-text">View generated documentation locally with <code class="docutils literal notranslate"><span class="pre">python3</span> <span class="pre">-m</span> <span class="pre">http.server</span></code>.</span><a class="headerlink" href="#id1" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/objects.inv b/objects.inv
index 4dc5512ac..b0de8a293 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/recipes/index.html b/recipes/index.html
index 3d873b7fe..8d6fd0780 100644
--- a/recipes/index.html
+++ b/recipes/index.html
@@ -93,6 +93,7 @@ Currently, only speech recognition recipes are provided.</p>
 <li class="toctree-l1"><a class="reference internal" href="librispeech/index.html">LibriSpeech</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="librispeech/tdnn_lstm_ctc.html">TDNN-LSTM-CTC</a></li>
 <li class="toctree-l2"><a class="reference internal" href="librispeech/conformer_ctc.html">Conformer CTC</a></li>
+<li class="toctree-l2"><a class="reference internal" href="librispeech/lstm_pruned_stateless_transducer.html">Transducer</a></li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="timit/index.html">TIMIT</a><ul>
diff --git a/recipes/librispeech/conformer_ctc.html b/recipes/librispeech/conformer_ctc.html
index 1b5b0d512..b3dc3df78 100644
--- a/recipes/librispeech/conformer_ctc.html
+++ b/recipes/librispeech/conformer_ctc.html
@@ -19,7 +19,7 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="TIMIT" href="../timit/index.html" />
+    <link rel="next" title="Transducer" href="lstm_pruned_stateless_transducer.html" />
     <link rel="prev" title="TDNN-LSTM-CTC" href="tdnn_lstm_ctc.html" /> 
 </head>
 
@@ -54,6 +54,7 @@
 <li class="toctree-l4"><a class="reference internal" href="#deployment-with-c">Deployment with C++</a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="lstm_pruned_stateless_transducer.html">Transducer</a></li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
@@ -1086,7 +1087,7 @@ Please see <a class="reference external" href="https://colab.research.google.com
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="tdnn_lstm_ctc.html" class="btn btn-neutral float-left" title="TDNN-LSTM-CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="../timit/index.html" class="btn btn-neutral float-right" title="TIMIT" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="lstm_pruned_stateless_transducer.html" class="btn btn-neutral float-right" title="Transducer" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
diff --git a/recipes/librispeech/index.html b/recipes/librispeech/index.html
index 5ef6d9104..1e8ffe6a2 100644
--- a/recipes/librispeech/index.html
+++ b/recipes/librispeech/index.html
@@ -46,6 +46,7 @@
 <li class="toctree-l2 current"><a class="current reference internal" href="#">LibriSpeech</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM-CTC</a></li>
 <li class="toctree-l3"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
+<li class="toctree-l3"><a class="reference internal" href="lstm_pruned_stateless_transducer.html">Transducer</a></li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
@@ -87,6 +88,7 @@
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM-CTC</a></li>
 <li class="toctree-l1"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
+<li class="toctree-l1"><a class="reference internal" href="lstm_pruned_stateless_transducer.html">Transducer</a></li>
 </ul>
 </div>
 </section>
diff --git a/recipes/librispeech/lstm_pruned_stateless_transducer.html b/recipes/librispeech/lstm_pruned_stateless_transducer.html
new file mode 100644
index 000000000..0f2bb88a6
--- /dev/null
+++ b/recipes/librispeech/lstm_pruned_stateless_transducer.html
@@ -0,0 +1,711 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" >
+<head>
+  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transducer &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
+      <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
+        <script src="../../_static/jquery.js"></script>
+        <script src="../../_static/underscore.js"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
+        <script src="../../_static/doctools.js"></script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="TIMIT" href="../timit/index.html" />
+    <link rel="prev" title="Conformer CTC" href="conformer_ctc.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+            <a href="../../index.html" class="icon icon-home"> icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Recipes</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="../aishell/index.html">aishell</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="index.html">LibriSpeech</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM-CTC</a></li>
+<li class="toctree-l3"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
+<li class="toctree-l3 current"><a class="current reference internal" href="#">Transducer</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="#which-model-to-use">Which model to use</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#data-preparation">Data preparation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#training">Training</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#decoding">Decoding</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#export-models">Export models</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#download-pretrained-models">Download pretrained models</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="../../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../huggingface/index.html">Huggingface</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home"></a> &raquo;</li>
+          <li><a href="../index.html">Recipes</a> &raquo;</li>
+          <li><a href="index.html">LibriSpeech</a> &raquo;</li>
+      <li>Transducer</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/recipes/librispeech/lstm_pruned_stateless_transducer.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="transducer">
+<h1>Transducer<a class="headerlink" href="#transducer" title="Permalink to this heading"></a></h1>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>Please scroll down to the bottom of this page to find download links
+for pretrained models if you don’t want to train a model from scratch.</p>
+</div>
+<p>This tutorial shows you how to train a transducer model
+with the <a class="reference external" href="https://www.openslr.org/12">LibriSpeech</a> dataset.</p>
+<p>We use pruned RNN-T to compute the loss.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>You can find the paper about pruned RNN-T at the following address:</p>
+<p><a class="reference external" href="https://arxiv.org/abs/2206.13236">https://arxiv.org/abs/2206.13236</a></p>
+</div>
+<p>The transducer model consists of 3 parts:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>Encoder, a.k.a, transcriber. We use an LSTM model</p></li>
+<li><p>Decoder, a.k.a, predictor. We use a model consisting of <code class="docutils literal notranslate"><span class="pre">nn.Embedding</span></code>
+and <code class="docutils literal notranslate"><span class="pre">nn.Conv1d</span></code></p></li>
+<li><p>Joiner, a.k.a, the joint network.</p></li>
+</ul>
+</div></blockquote>
+<div class="admonition caution">
+<p class="admonition-title">Caution</p>
+<p>Contrary to the conventional RNN-T models, we use a stateless decoder.
+That is, it has no recurrent connections.</p>
+</div>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>Since the encoder model is an LSTM, not Transformer/Conformer, the
+resulting model is suitable for streaming/online ASR.</p>
+</div>
+<section id="which-model-to-use">
+<h2>Which model to use<a class="headerlink" href="#which-model-to-use" title="Permalink to this heading"></a></h2>
+<p>Currently, there are two folders about LSTM stateless transducer training:</p>
+<blockquote>
+<div><ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">(1)</span></code> <a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless">https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless</a></p>
+<p>This recipe uses only LibriSpeech during training.</p>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">(2)</span></code> <a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2">https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2</a></p>
+<p>This recipe uses GigaSpeech + LibriSpeech during training.</p>
+</li>
+</ul>
+</div></blockquote>
+<p><code class="docutils literal notranslate"><span class="pre">(1)</span></code> and <code class="docutils literal notranslate"><span class="pre">(2)</span></code> use the same model architecture. The only difference is that <code class="docutils literal notranslate"><span class="pre">(2)</span></code> supports
+multi-dataset. Since <code class="docutils literal notranslate"><span class="pre">(2)</span></code> uses more data, it has a lower WER than <code class="docutils literal notranslate"><span class="pre">(1)</span></code> but it needs
+more training time.</p>
+<p>We use <code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2</span></code> as an example below.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>You need to download the <a class="reference external" href="https://github.com/SpeechColab/GigaSpeech">GigaSpeech</a> dataset
+to run <code class="docutils literal notranslate"><span class="pre">(2)</span></code>. If you have only <code class="docutils literal notranslate"><span class="pre">LibriSpeech</span></code> dataset available, feel free to use <code class="docutils literal notranslate"><span class="pre">(1)</span></code>.</p>
+</div>
+</section>
+<section id="data-preparation">
+<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ ./prepare.sh
+
+<span class="c1"># If you use (1), you can **skip** the following command</span>
+$ ./prepare_giga_speech.sh
+</pre></div>
+</div>
+<p>The script <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> handles the data preparation for you, <strong>automagically</strong>.
+All you need to do is to run it.</p>
+<p>The data preparation contains several stages, you can use the following two
+options:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">--stage</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--stop-stage</span></code></p></li>
+</ul>
+</div></blockquote>
+<p>to control which stage(s) should be run. By default, all stages are executed.</p>
+<p>For example,</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ ./prepare.sh --stage <span class="m">0</span> --stop-stage <span class="m">0</span>
+</pre></div>
+</div>
+<p>means to run only stage 0.</p>
+<p>To run stage 2 to stage 5, use:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ ./prepare.sh --stage <span class="m">2</span> --stop-stage <span class="m">5</span>
+</pre></div>
+</div>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>If you have pre-downloaded the <a class="reference external" href="https://www.openslr.org/12">LibriSpeech</a>
+dataset and the <a class="reference external" href="http://www.openslr.org/17/">musan</a> dataset, say,
+they are saved in <code class="docutils literal notranslate"><span class="pre">/tmp/LibriSpeech</span></code> and <code class="docutils literal notranslate"><span class="pre">/tmp/musan</span></code>, you can modify
+the <code class="docutils literal notranslate"><span class="pre">dl_dir</span></code> variable in <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> to point to <code class="docutils literal notranslate"><span class="pre">/tmp</span></code> so that
+<code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> won’t re-download them.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>All generated files by <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>, e.g., features, lexicon, etc,
+are saved in <code class="docutils literal notranslate"><span class="pre">./data</span></code> directory.</p>
+</div>
+<p>We provide the following YouTube video showing how to run <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>To get the latest news of <a class="reference external" href="https://github.com/k2-fsa">next-gen Kaldi</a>, please subscribe
+the following YouTube channel by <a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">Nadira Povey</a>:</p>
+<blockquote>
+<div><p><a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw</a></p>
+</div></blockquote>
+</div>
+<div class="video_wrapper" style="">
+<iframe allowfullscreen="true" src="https://www.youtube.com/embed/ofEIoJL-mGM" style="border: 0; height: 345px; width: 560px">
+</iframe></div></section>
+<section id="training">
+<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
+<section id="configurable-options">
+<h3>Configurable options<a class="headerlink" href="#configurable-options" title="Permalink to this heading"></a></h3>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ ./lstm_transducer_stateless2/train.py --help
+</pre></div>
+</div>
+<p>shows you the training options that can be passed from the commandline.
+The following options are used quite often:</p>
+<blockquote>
+<div><ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">--full-libri</span></code></p>
+<p>If it’s True, the training part uses all the training data, i.e.,
+960 hours. Otherwise, the training part uses only the subset
+<code class="docutils literal notranslate"><span class="pre">train-clean-100</span></code>, which has 100 hours of training data.</p>
+<div class="admonition caution">
+<p class="admonition-title">Caution</p>
+<p>The training set is perturbed by speed with two factors: 0.9 and 1.1.
+If <code class="docutils literal notranslate"><span class="pre">--full-libri</span></code> is True, each epoch actually processes
+<code class="docutils literal notranslate"><span class="pre">3x960</span> <span class="pre">==</span> <span class="pre">2880</span></code> hours of data.</p>
+</div>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--num-epochs</span></code></p>
+<p>It is the number of epochs to train. For instance,
+<code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/train.py</span> <span class="pre">--num-epochs</span> <span class="pre">30</span></code> trains for 30 epochs
+and generates <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-2.pt</span></code>, …, <code class="docutils literal notranslate"><span class="pre">epoch-30.pt</span></code>
+in the folder <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp</span></code>.</p>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--start-epoch</span></code></p>
+<p>It’s used to resume training.
+<code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/train.py</span> <span class="pre">--start-epoch</span> <span class="pre">10</span></code> loads the
+checkpoint <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/epoch-9.pt</span></code> and starts
+training from epoch 10, based on the state from epoch 9.</p>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--world-size</span></code></p>
+<p>It is used for multi-GPU single-machine DDP training.</p>
+<blockquote>
+<div><ul class="simple">
+<li><ol class="loweralpha simple">
+<li><p>If it is 1, then no DDP training is used.</p></li>
+</ol>
+</li>
+<li><ol class="loweralpha simple" start="2">
+<li><p>If it is 2, then GPU 0 and GPU 1 are used for DDP training.</p></li>
+</ol>
+</li>
+</ul>
+</div></blockquote>
+<p>The following shows some use cases with it.</p>
+<blockquote>
+<div><p><strong>Use case 1</strong>: You have 4 GPUs, but you only want to use GPU 0 and
+GPU 2 for training. You can do the following:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ <span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;0,2&quot;</span>
+$ ./lstm_transducer_stateless2/train.py --world-size <span class="m">2</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Use case 2</strong>: You have 4 GPUs and you want to use all of them
+for training. You can do the following:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ ./lstm_transducer_stateless2/train.py --world-size <span class="m">4</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p><strong>Use case 3</strong>: You have 4 GPUs but you only want to use GPU 3
+for training. You can do the following:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ <span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;3&quot;</span>
+$ ./lstm_transducer_stateless2/train.py --world-size <span class="m">1</span>
+</pre></div>
+</div>
+</div></blockquote>
+</div></blockquote>
+<div class="admonition caution">
+<p class="admonition-title">Caution</p>
+<p>Only multi-GPU single-machine DDP training is implemented at present.
+Multi-GPU multi-machine DDP training will be added later.</p>
+</div>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
+<p>It specifies the number of seconds over all utterances in a
+batch, before <strong>padding</strong>.
+If you encounter CUDA OOM, please reduce it.</p>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>Due to padding, the number of seconds of all utterances in a
+batch will usually be larger than <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code>.</p>
+<p>A larger value for <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> may cause OOM during training,
+while a smaller value may increase the training time. You have to
+tune it.</p>
+</div>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">--giga-prob</span></code></p>
+<p>The probability to select a batch from the <code class="docutils literal notranslate"><span class="pre">GigaSpeech</span></code> dataset.
+Note: It is available only for <code class="docutils literal notranslate"><span class="pre">(2)</span></code>.</p>
+</li>
+</ul>
+</div></blockquote>
+</section>
+<section id="pre-configured-options">
+<h3>Pre-configured options<a class="headerlink" href="#pre-configured-options" title="Permalink to this heading"></a></h3>
+<p>There are some training options, e.g., weight decay,
+number of warmup steps, results dir, etc,
+that are not passed from the commandline.
+They are pre-configured by the function <code class="docutils literal notranslate"><span class="pre">get_params()</span></code> in
+<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/train.py">lstm_transducer_stateless2/train.py</a></p>
+<p>You don’t need to change these pre-configured parameters. If you really need to change
+them, please modify <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/train.py</span></code> directly.</p>
+</section>
+<section id="training-logs">
+<h3>Training logs<a class="headerlink" href="#training-logs" title="Permalink to this heading"></a></h3>
+<p>Training logs and checkpoints are saved in <code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/exp</span></code>.
+You will find the following files in that directory:</p>
+<blockquote>
+<div><ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-2.pt</span></code>, …</p>
+<p>These are checkpoint files saved at the end of each epoch, containing model
+<code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
+To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">epoch-10.pt</span></code>, you can use:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ ./lstm_transducer_stateless2/train.py --start-epoch <span class="m">11</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">checkpoint-436000.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">checkpoint-438000.pt</span></code>, …</p>
+<p>These are checkpoint files saved every <code class="docutils literal notranslate"><span class="pre">--save-every-n</span></code> batches,
+containing model <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
+To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">checkpoint-436000</span></code>, you can use:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ ./lstm_transducer_stateless2/train.py --start-batch <span class="m">436000</span>
+</pre></div>
+</div>
+</div></blockquote>
+</li>
+<li><p><code class="docutils literal notranslate"><span class="pre">tensorboard/</span></code></p>
+<p>This folder contains TensorBoard logs. Training loss, validation loss, learning
+rate, etc, are recorded in these logs. You can visualize them by:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> lstm_transducer_stateless2/exp/tensorboard
+$ tensorboard dev upload --logdir . --description <span class="s2">&quot;LSTM transducer training for LibriSpeech with icefall&quot;</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>It will print something like below:</p>
+<blockquote>
+<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">TensorFlow</span> <span class="n">installation</span> <span class="ow">not</span> <span class="n">found</span> <span class="o">-</span> <span class="n">running</span> <span class="k">with</span> <span class="n">reduced</span> <span class="n">feature</span> <span class="nb">set</span><span class="o">.</span>
+<span class="n">Upload</span> <span class="n">started</span> <span class="ow">and</span> <span class="n">will</span> <span class="k">continue</span> <span class="n">reading</span> <span class="nb">any</span> <span class="n">new</span> <span class="n">data</span> <span class="k">as</span> <span class="n">it</span><span class="s1">&#39;s added to the logdir.</span>
+
+<span class="n">To</span> <span class="n">stop</span> <span class="n">uploading</span><span class="p">,</span> <span class="n">press</span> <span class="n">Ctrl</span><span class="o">-</span><span class="n">C</span><span class="o">.</span>
+
+<span class="n">New</span> <span class="n">experiment</span> <span class="n">created</span><span class="o">.</span> <span class="n">View</span> <span class="n">your</span> <span class="n">TensorBoard</span> <span class="n">at</span><span class="p">:</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">tensorboard</span><span class="o">.</span><span class="n">dev</span><span class="o">/</span><span class="n">experiment</span><span class="o">/</span><span class="n">cj2vtPiwQHKN9Q1tx6PTpg</span><span class="o">/</span>
+
+<span class="p">[</span><span class="mi">2022</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">20</span><span class="n">T15</span><span class="p">:</span><span class="mi">50</span><span class="p">:</span><span class="mi">50</span><span class="p">]</span> <span class="n">Started</span> <span class="n">scanning</span> <span class="n">logdir</span><span class="o">.</span>
+<span class="n">Uploading</span> <span class="mi">4468</span> <span class="n">scalars</span><span class="o">...</span>
+<span class="p">[</span><span class="mi">2022</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">20</span><span class="n">T15</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">02</span><span class="p">]</span> <span class="n">Total</span> <span class="n">uploaded</span><span class="p">:</span> <span class="mi">210171</span> <span class="n">scalars</span><span class="p">,</span> <span class="mi">0</span> <span class="n">tensors</span><span class="p">,</span> <span class="mi">0</span> <span class="n">binary</span> <span class="n">objects</span>
+<span class="n">Listening</span> <span class="k">for</span> <span class="n">new</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">logdir</span><span class="o">...</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Note there is a URL in the above output, click it and you will see
+the following screenshot:</p>
+<blockquote>
+<div><figure class="align-center" id="id2">
+<a class="reference external image-reference" href="https://tensorboard.dev/experiment/lzGnETjwRxC3yghNMd4kPw/"><img alt="TensorBoard screenshot" src="../../_images/librispeech-lstm-transducer-tensorboard-log.png" style="width: 600px;" /></a>
+<figcaption>
+<p><span class="caption-number">Fig. 5 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id2" title="Permalink to this image"></a></p>
+</figcaption>
+</figure>
+</div></blockquote>
+</li>
+</ul>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>If you don’t have access to google, you can use the following command
+to view the tensorboard log locally:</p>
+<blockquote>
+<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> lstm_transducer_stateless2/exp/tensorboard
+tensorboard --logdir . --port <span class="m">6008</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>It will print the following message:</p>
+<blockquote>
+<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Serving</span> <span class="n">TensorBoard</span> <span class="n">on</span> <span class="n">localhost</span><span class="p">;</span> <span class="n">to</span> <span class="n">expose</span> <span class="n">to</span> <span class="n">the</span> <span class="n">network</span><span class="p">,</span> <span class="n">use</span> <span class="n">a</span> <span class="n">proxy</span> <span class="ow">or</span> <span class="k">pass</span> <span class="o">--</span><span class="n">bind_all</span>
+<span class="n">TensorBoard</span> <span class="mf">2.8.0</span> <span class="n">at</span> <span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="n">localhost</span><span class="p">:</span><span class="mi">6008</span><span class="o">/</span> <span class="p">(</span><span class="n">Press</span> <span class="n">CTRL</span><span class="o">+</span><span class="n">C</span> <span class="n">to</span> <span class="n">quit</span><span class="p">)</span>
+</pre></div>
+</div>
+</div></blockquote>
+<p>Now start your browser and go to <a class="reference external" href="http://localhost:6008">http://localhost:6008</a> to view the tensorboard
+logs.</p>
+</div>
+<ul>
+<li><p><code class="docutils literal notranslate"><span class="pre">log/log-train-xxxx</span></code></p>
+<p>It is the detailed training log in text format, same as the one
+you saw printed to the console during training.</p>
+</li>
+</ul>
+</div></blockquote>
+</section>
+<section id="usage-example">
+<h3>Usage example<a class="headerlink" href="#usage-example" title="Permalink to this heading"></a></h3>
+<p>You can use the following command to start the training using 8 GPUs:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;0,1,2,3,4,5,6,7&quot;</span>
+./lstm_transducer_stateless2/train.py <span class="se">\</span>
+  --world-size <span class="m">8</span> <span class="se">\</span>
+  --num-epochs <span class="m">35</span> <span class="se">\</span>
+  --start-epoch <span class="m">1</span> <span class="se">\</span>
+  --full-libri <span class="m">1</span> <span class="se">\</span>
+  --exp-dir lstm_transducer_stateless2/exp <span class="se">\</span>
+  --max-duration <span class="m">500</span> <span class="se">\</span>
+  --use-fp16 <span class="m">0</span> <span class="se">\</span>
+  --lr-epochs <span class="m">10</span> <span class="se">\</span>
+  --num-workers <span class="m">2</span> <span class="se">\</span>
+  --giga-prob <span class="m">0</span>.9
+</pre></div>
+</div>
+</section>
+</section>
+<section id="decoding">
+<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
+<p>The decoding part uses checkpoints saved by the training part, so you have
+to run the training part first.</p>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>There are two kinds of checkpoints:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p>(1) <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-2.pt</span></code>, …, which are saved at the end
+of each epoch. You can pass <code class="docutils literal notranslate"><span class="pre">--epoch</span></code> to
+<code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/decode.py</span></code> to use them.</p></li>
+<li><p>(2) <code class="docutils literal notranslate"><span class="pre">checkpoints-436000.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-438000.pt</span></code>, …, which are saved
+every <code class="docutils literal notranslate"><span class="pre">--save-every-n</span></code> batches. You can pass <code class="docutils literal notranslate"><span class="pre">--iter</span></code> to
+<code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/decode.py</span></code> to use them.</p></li>
+</ul>
+<p>We suggest that you try both types of checkpoints and choose the one
+that produces the lowest WERs.</p>
+</div></blockquote>
+</div>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/librispeech/ASR
+$ ./lstm_transducer_stateless2/decode.py --help
+</pre></div>
+</div>
+<p>shows the options for decoding.</p>
+<p>The following shows two examples:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> m <span class="k">in</span> greedy_search fast_beam_search modified_beam_search<span class="p">;</span> <span class="k">do</span>
+  <span class="k">for</span> epoch <span class="k">in</span> <span class="m">17</span><span class="p">;</span> <span class="k">do</span>
+    <span class="k">for</span> avg <span class="k">in</span> <span class="m">1</span> <span class="m">2</span><span class="p">;</span> <span class="k">do</span>
+      ./lstm_transducer_stateless2/decode.py <span class="se">\</span>
+        --epoch <span class="nv">$epoch</span> <span class="se">\</span>
+        --avg <span class="nv">$avg</span> <span class="se">\</span>
+        --exp-dir lstm_transducer_stateless2/exp <span class="se">\</span>
+        --max-duration <span class="m">600</span> <span class="se">\</span>
+        --num-encoder-layers <span class="m">12</span> <span class="se">\</span>
+        --rnn-hidden-size <span class="m">1024</span> <span class="se">\</span>
+        --decoding-method <span class="nv">$m</span> <span class="se">\</span>
+        --use-averaged-model True <span class="se">\</span>
+        --beam <span class="m">4</span> <span class="se">\</span>
+        --max-contexts <span class="m">4</span> <span class="se">\</span>
+        --max-states <span class="m">8</span> <span class="se">\</span>
+        --beam-size <span class="m">4</span>
+    <span class="k">done</span>
+  <span class="k">done</span>
+<span class="k">done</span>
+</pre></div>
+</div>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> m <span class="k">in</span> greedy_search fast_beam_search modified_beam_search<span class="p">;</span> <span class="k">do</span>
+  <span class="k">for</span> iter <span class="k">in</span> <span class="m">474000</span><span class="p">;</span> <span class="k">do</span>
+    <span class="k">for</span> avg <span class="k">in</span> <span class="m">8</span> <span class="m">10</span> <span class="m">12</span> <span class="m">14</span> <span class="m">16</span> <span class="m">18</span><span class="p">;</span> <span class="k">do</span>
+      ./lstm_transducer_stateless2/decode.py <span class="se">\</span>
+        --iter <span class="nv">$iter</span> <span class="se">\</span>
+        --avg <span class="nv">$avg</span> <span class="se">\</span>
+        --exp-dir lstm_transducer_stateless2/exp <span class="se">\</span>
+        --max-duration <span class="m">600</span> <span class="se">\</span>
+        --num-encoder-layers <span class="m">12</span> <span class="se">\</span>
+        --rnn-hidden-size <span class="m">1024</span> <span class="se">\</span>
+        --decoding-method <span class="nv">$m</span> <span class="se">\</span>
+        --use-averaged-model True <span class="se">\</span>
+        --beam <span class="m">4</span> <span class="se">\</span>
+        --max-contexts <span class="m">4</span> <span class="se">\</span>
+        --max-states <span class="m">8</span> <span class="se">\</span>
+        --beam-size <span class="m">4</span>
+    <span class="k">done</span>
+  <span class="k">done</span>
+<span class="k">done</span>
+</pre></div>
+</div>
+</section>
+<section id="export-models">
+<h2>Export models<a class="headerlink" href="#export-models" title="Permalink to this heading"></a></h2>
+<p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/lstm_transducer_stateless2/export.py">lstm_transducer_stateless2/export.py</a> supports to export checkpoints from <code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/exp</span></code> in the following ways.</p>
+<section id="export-model-state-dict">
+<h3>Export <code class="docutils literal notranslate"><span class="pre">model.state_dict()</span></code><a class="headerlink" href="#export-model-state-dict" title="Permalink to this heading"></a></h3>
+<p>Checkpoints saved by <code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/train.py</span></code> also include
+<code class="docutils literal notranslate"><span class="pre">optimizer.state_dict()</span></code>. It is useful for resuming training. But after training,
+we are interested only in <code class="docutils literal notranslate"><span class="pre">model.state_dict()</span></code>. You can use the following
+command to extract <code class="docutils literal notranslate"><span class="pre">model.state_dict()</span></code>.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># Assume that --iter 468000 --avg 16 produces the smallest WER</span>
+<span class="c1"># (You can get such information after running ./lstm_transducer_stateless2/decode.py)</span>
+
+<span class="nv">iter</span><span class="o">=</span><span class="m">468000</span>
+<span class="nv">avg</span><span class="o">=</span><span class="m">16</span>
+
+./lstm_transducer_stateless2/export.py <span class="se">\</span>
+  --exp-dir ./lstm_transducer_stateless2/exp <span class="se">\</span>
+  --bpe-model data/lang_bpe_500/bpe.model <span class="se">\</span>
+  --iter <span class="nv">$iter</span> <span class="se">\</span>
+  --avg  <span class="nv">$avg</span>
+</pre></div>
+</div>
+<p>It will generate a file <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/pretrained.pt</span></code>.</p>
+<div class="admonition hint">
+<p class="admonition-title">Hint</p>
+<p>To use the generated <code class="docutils literal notranslate"><span class="pre">pretrained.pt</span></code> for <code class="docutils literal notranslate"><span class="pre">lstm_transducer_stateless2/decode.py</span></code>,
+you can run:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> lstm_transducer_stateless2/exp
+ln -s pretrained epoch-9999.pt
+</pre></div>
+</div>
+<p>And then pass <cite>–epoch 9999 –avg 1 –use-averaged-model 0</cite> to
+<code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/decode.py</span></code>.</p>
+</div>
+<p>To use the exported model with <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/pretrained.py</span></code>, you
+can run:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./lstm_transducer_stateless2/pretrained.py <span class="se">\</span>
+  --checkpoint ./lstm_transducer_stateless2/exp/pretrained.pt <span class="se">\</span>
+  --bpe-model ./data/lang_bpe_500/bpe.model <span class="se">\</span>
+  --method greedy_search <span class="se">\</span>
+  /path/to/foo.wav <span class="se">\</span>
+  /path/to/bar.wav
+</pre></div>
+</div>
+</section>
+<section id="export-model-using-torch-jit-trace">
+<h3>Export model using <code class="docutils literal notranslate"><span class="pre">torch.jit.trace()</span></code><a class="headerlink" href="#export-model-using-torch-jit-trace" title="Permalink to this heading"></a></h3>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nv">iter</span><span class="o">=</span><span class="m">468000</span>
+<span class="nv">avg</span><span class="o">=</span><span class="m">16</span>
+
+./lstm_transducer_stateless2/export.py <span class="se">\</span>
+  --exp-dir ./lstm_transducer_stateless2/exp <span class="se">\</span>
+  --bpe-model data/lang_bpe_500/bpe.model <span class="se">\</span>
+  --iter <span class="nv">$iter</span> <span class="se">\</span>
+  --avg  <span class="nv">$avg</span> <span class="se">\</span>
+  --jit-trace <span class="m">1</span>
+</pre></div>
+</div>
+<p>It will generate 3 files:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/encoder_jit_trace.pt</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/decoder_jit_trace.pt</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/joiner_jit_trace.pt</span></code></p></li>
+</ul>
+</div></blockquote>
+<p>To use the generated files with <code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/jit_pretrained</span></code>:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./lstm_transducer_stateless2/jit_pretrained.py <span class="se">\</span>
+  --bpe-model ./data/lang_bpe_500/bpe.model <span class="se">\</span>
+  --encoder-model-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace.pt <span class="se">\</span>
+  --decoder-model-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace.pt <span class="se">\</span>
+  --joiner-model-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace.pt <span class="se">\</span>
+  /path/to/foo.wav <span class="se">\</span>
+  /path/to/bar.wav
+</pre></div>
+</div>
+</section>
+<section id="export-model-for-ncnn">
+<h3>Export model for ncnn<a class="headerlink" href="#export-model-for-ncnn" title="Permalink to this heading"></a></h3>
+<p>We support exporting pretrained LSTM transducer models to
+<a class="reference external" href="https://github.com/tencent/ncnn">ncnn</a> using
+<a class="reference external" href="https://github.com/Tencent/ncnn/tree/master/tools/pnnx">pnnx</a>.</p>
+<p>First, let us install a modified version of <code class="docutils literal notranslate"><span class="pre">ncnn</span></code>:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>git clone https://github.com/csukuangfj/ncnn
+<span class="nb">cd</span> ncnn
+git submodule update --recursive --init
+python3 setup.py bdist_wheel
+ls -lh dist/
+pip install ./dist/*.whl
+
+<span class="c1"># now build pnnx</span>
+<span class="nb">cd</span> tools/pnnx
+mkdir build
+<span class="nb">cd</span> build
+make -j4
+<span class="nb">export</span> <span class="nv">PATH</span><span class="o">=</span><span class="nv">$PWD</span>/src:<span class="nv">$PATH</span>
+
+./src/pnnx
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>We assume that you have added the path to the binary <code class="docutils literal notranslate"><span class="pre">pnnx</span></code> to the
+environment variable <code class="docutils literal notranslate"><span class="pre">PATH</span></code>.</p>
+</div>
+<p>Second, let us export the model using <code class="docutils literal notranslate"><span class="pre">torch.jit.trace()</span></code> that is suitable
+for <code class="docutils literal notranslate"><span class="pre">pnnx</span></code>:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nv">iter</span><span class="o">=</span><span class="m">468000</span>
+<span class="nv">avg</span><span class="o">=</span><span class="m">16</span>
+
+./lstm_transducer_stateless2/export.py <span class="se">\</span>
+  --exp-dir ./lstm_transducer_stateless2/exp <span class="se">\</span>
+  --bpe-model data/lang_bpe_500/bpe.model <span class="se">\</span>
+  --iter <span class="nv">$iter</span> <span class="se">\</span>
+  --avg  <span class="nv">$avg</span> <span class="se">\</span>
+  --pnnx <span class="m">1</span>
+</pre></div>
+</div>
+<p>It will generate 3 files:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt</span></code></p></li>
+</ul>
+</div></blockquote>
+<p>Third, convert torchscript model to <code class="docutils literal notranslate"><span class="pre">ncnn</span></code> format:</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pnnx</span> <span class="o">./</span><span class="n">lstm_transducer_stateless2</span><span class="o">/</span><span class="n">exp</span><span class="o">/</span><span class="n">encoder_jit_trace</span><span class="o">-</span><span class="n">pnnx</span><span class="o">.</span><span class="n">pt</span>
+<span class="n">pnnx</span> <span class="o">./</span><span class="n">lstm_transducer_stateless2</span><span class="o">/</span><span class="n">exp</span><span class="o">/</span><span class="n">decoder_jit_trace</span><span class="o">-</span><span class="n">pnnx</span><span class="o">.</span><span class="n">pt</span>
+<span class="n">pnnx</span> <span class="o">./</span><span class="n">lstm_transducer_stateless2</span><span class="o">/</span><span class="n">exp</span><span class="o">/</span><span class="n">joiner_jit_trace</span><span class="o">-</span><span class="n">pnnx</span><span class="o">.</span><span class="n">pt</span>
+</pre></div>
+</div>
+<p>It will generate the following files:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param</span></code></p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin</span></code></p></li>
+</ul>
+</div></blockquote>
+<p>To use the above generate files, run:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>
+</pre></div>
+</div>
+<dl class="simple">
+<dt>./lstm_transducer_stateless2/ncnn-decode.py </dt><dd><p>–bpe-model-filename ./data/lang_bpe_500/bpe.model –encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param –encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin –decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param –decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin –joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param –joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin /path/to/foo.wav</p>
+</dd>
+</dl>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>
+</pre></div>
+</div>
+<dl class="simple">
+<dt>./lstm_transducer_stateless2/streaming-ncnn-decode.py </dt><dd><p>–bpe-model-filename ./data/lang_bpe_500/bpe.model –encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param –encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin –decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param –decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin –joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param –joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin /path/to/foo.wav</p>
+</dd>
+</dl>
+<p>To use the above generated files in C++, please see
+<a class="reference external" href="https://github.com/k2-fsa/sherpa-ncnn">https://github.com/k2-fsa/sherpa-ncnn</a></p>
+<p>It is able to generate a static linked library that can be run on Linux, Windows,
+macOS, Raspberry Pi, etc.</p>
+</section>
+</section>
+<section id="download-pretrained-models">
+<h2>Download pretrained models<a class="headerlink" href="#download-pretrained-models" title="Permalink to this heading"></a></h2>
+<p>If you don’t want to train from scratch, you can download the pretrained models
+by visiting the following links:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><a class="reference external" href="https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03">https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03</a></p></li>
+<li><p><a class="reference external" href="https://huggingface.co/Zengwei/icefall-asr-librispeech-lstm-transducer-stateless-2022-08-18">https://huggingface.co/Zengwei/icefall-asr-librispeech-lstm-transducer-stateless-2022-08-18</a></p></li>
+</ul>
+<p>See <a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md">https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md</a>
+for the details of the above pretrained models</p>
+</div></blockquote>
+<p>You can find more usages of the pretrained models in
+<a class="reference external" href="https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/index.html">https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/index.html</a></p>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="conformer_ctc.html" class="btn btn-neutral float-left" title="Conformer CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../timit/index.html" class="btn btn-neutral float-right" title="TIMIT" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/recipes/librispeech/tdnn_lstm_ctc.html b/recipes/librispeech/tdnn_lstm_ctc.html
index a20f7c0e7..417a6b4ed 100644
--- a/recipes/librispeech/tdnn_lstm_ctc.html
+++ b/recipes/librispeech/tdnn_lstm_ctc.html
@@ -53,6 +53,7 @@
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
+<li class="toctree-l3"><a class="reference internal" href="lstm_pruned_stateless_transducer.html">Transducer</a></li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
diff --git a/recipes/timit/index.html b/recipes/timit/index.html
index fc90b5a52..adadaf330 100644
--- a/recipes/timit/index.html
+++ b/recipes/timit/index.html
@@ -20,7 +20,7 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="TDNN-LiGRU-CTC" href="tdnn_ligru_ctc.html" />
-    <link rel="prev" title="Conformer CTC" href="../librispeech/conformer_ctc.html" /> 
+    <link rel="prev" title="Transducer" href="../librispeech/lstm_pruned_stateless_transducer.html" /> 
 </head>
 
 <body class="wy-body-for-nav"> 
@@ -95,7 +95,7 @@
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="../librispeech/conformer_ctc.html" class="btn btn-neutral float-left" title="Conformer CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../librispeech/lstm_pruned_stateless_transducer.html" class="btn btn-neutral float-left" title="Transducer" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="tdnn_ligru_ctc.html" class="btn btn-neutral float-right" title="TDNN-LiGRU-CTC" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
diff --git a/recipes/yesno/tdnn.html b/recipes/yesno/tdnn.html
index 56b6c047f..4da01fee7 100644
--- a/recipes/yesno/tdnn.html
+++ b/recipes/yesno/tdnn.html
@@ -281,7 +281,7 @@ the following screenshot:</p>
 <div><figure class="align-center" id="id1">
 <a class="reference external image-reference" href="https://tensorboard.dev/experiment/yKUbhb5wRmOSXYkId1z9eg/"><img alt="TensorBoard screenshot" src="../../_images/tdnn-tensorboard-log.png" style="width: 600px;" /></a>
 <figcaption>
-<p><span class="caption-number">Fig. 5 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id1" title="Permalink to this image"></a></p>
+<p><span class="caption-number">Fig. 6 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id1" title="Permalink to this image"></a></p>
 </figcaption>
 </figure>
 </div></blockquote>
diff --git a/searchindex.js b/searchindex.js
index 51d4ef785..561b2fb51 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "recipes/aishell/conformer_ctc", "recipes/aishell/index", "recipes/aishell/stateless_transducer", "recipes/aishell/tdnn_lstm_ctc", "recipes/index", "recipes/librispeech/conformer_ctc", "recipes/librispeech/index", "recipes/librispeech/tdnn_lstm_ctc", "recipes/timit/index", "recipes/timit/tdnn_ligru_ctc", "recipes/timit/tdnn_lstm_ctc", "recipes/yesno/index", "recipes/yesno/tdnn"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "recipes/aishell/conformer_ctc.rst", "recipes/aishell/index.rst", "recipes/aishell/stateless_transducer.rst", "recipes/aishell/tdnn_lstm_ctc.rst", "recipes/index.rst", "recipes/librispeech/conformer_ctc.rst", "recipes/librispeech/index.rst", "recipes/librispeech/tdnn_lstm_ctc.rst", "recipes/timit/index.rst", "recipes/timit/tdnn_ligru_ctc.rst", "recipes/timit/tdnn_lstm_ctc.rst", "recipes/yesno/index.rst", "recipes/yesno/tdnn.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Recipes", "Conformer CTC", "LibriSpeech", "TDNN-LSTM-CTC", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC"], "terms": {"we": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21], "us": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "tool": 0, "make": [0, 1, 3, 9, 11, 14], "consist": [0, 11], "possibl": [0, 2, 3, 8, 9, 14], "black": 0, "format": [0, 9, 11, 12, 14, 16, 18, 19, 21], "flake8": 0, "check": [0, 14], "qualiti": [0, 10], "isort": 0, "sort": [0, 8], "import": 0, "The": [0, 1, 2, 6, 8, 9, 10, 12, 14, 16, 18, 19, 21], "version": [0, 8, 9, 11, 12, 14, 16, 18, 19], "abov": [0, 8, 9, 10, 11, 12, 14, 21], "ar": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21], "12": [0, 8, 9, 11, 12, 14, 18, 21], "6b0": 0, "3": [0, 7, 12, 16, 21], "9": [0, 8, 9, 11, 12, 14, 16, 18, 21], "2": [0, 7, 16, 18, 19, 21], "5": [0, 9, 11, 12, 14, 16, 18, 19, 21], "after": [0, 1, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "run": [0, 2, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "command": [0, 1, 8, 9, 11, 12, 14, 16, 18, 19, 21], "git": [0, 8, 9, 11, 12, 14, 16, 18, 19, 21], "clone": [0, 8, 9, 11, 12, 14, 16, 18, 19, 21], "http": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "github": [0, 2, 5, 8, 9, 11, 12, 14, 16, 18, 19, 21], "com": [0, 2, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "k2": [0, 2, 5, 6, 7, 9, 11, 12, 14, 16, 18, 19], "fsa": [0, 2, 5, 6, 8, 9, 11, 14], "icefal": [0, 2, 3, 5, 6, 9, 11, 12, 13, 14, 16, 18, 19, 21], "cd": [0, 1, 2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "pip": [0, 1, 8, 11], "instal": [0, 1, 4, 6, 7, 21], "pre": [0, 3, 4, 6, 7, 8], "commit": 0, "whenev": 0, "you": [0, 1, 2, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "automat": [0, 6], "hook": 0, "invok": 0, "fail": [0, 8], "If": [0, 2, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "ani": [0, 8, 9, 11, 12, 14, 21], "your": [0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 16, 18, 19, 21], "wa": [0, 8, 14, 16], "success": [0, 8], "pleas": [0, 1, 2, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "fix": [0, 8, 14], "issu": [0, 8, 14], "report": [0, 8], "some": [0, 1, 9, 11, 12, 14, 16, 18, 19, 21], "i": [0, 1, 2, 6, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "e": [0, 2, 9, 11, 12, 14, 16, 18, 19, 21], "modifi": [0, 9, 12, 14, 16, 18, 19, 21], "file": [0, 2, 6, 9, 11, 12, 14, 16, 18, 19, 21], "place": [0, 8, 11, 14, 16], "so": [0, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "statu": 0, "failur": 0, "see": [0, 1, 6, 9, 11, 12, 14, 16, 18, 19, 21], "which": [0, 2, 6, 9, 10, 11, 12, 14, 16, 18, 19, 21], "ha": [0, 2, 9, 11, 12, 14, 16, 18, 19], "been": [0, 11], "befor": [0, 1, 9, 11, 12, 14], "further": 0, "chang": [0, 9, 11, 12, 14, 16, 18, 19, 21], "all": [0, 5, 6, 9, 11, 12, 14, 16, 18, 19, 21], "again": [0, 21], "should": [0, 2, 9, 11, 12, 14, 16, 18, 19, 21], "succe": 0, "thi": [0, 2, 3, 4, 8, 9, 11, 12, 13, 14, 16, 18, 19, 21], "time": [0, 9, 11, 12, 14, 16, 18, 19, 21], "succeed": 0, "want": [0, 8, 9, 11, 12, 14, 16, 18, 19, 21], "can": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "do": [0, 2, 9, 11, 12, 14, 16, 18, 19, 21], "21": [0, 8, 9, 11, 14, 18, 19], "your_changed_fil": 0, "py": [0, 2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "prepar": [1, 3], "environ": [1, 9, 10, 11, 12, 14, 16, 18, 19, 21], "doc": 1, "r": [1, 8, 18, 19], "requir": [1, 8], "txt": [1, 8, 9, 11, 12, 14, 16, 18, 19, 21], "set": [1, 8, 9, 11, 12, 14, 21], "up": [1, 8, 9, 12, 14, 16], "readi": [1, 9, 14], "refer": [1, 2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 8, 9, 11, 14], "local": [1, 8], "preview": 1, "what": [1, 2, 8, 11], "look": [1, 2, 5, 8, 9, 11, 12, 14], "like": [1, 2, 6, 8, 9, 11, 12, 14, 21], "publish": [1, 10], "html": [1, 2, 8], "gener": [1, 9, 11, 12, 14], "view": [1, 9, 11, 12, 14, 21], "follow": [1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 16, 18, 19, 21], "python3": [1, 8], "m": [1, 11, 18, 19], "server": [1, 6, 8], "It": [1, 2, 4, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "print": [1, 9, 11, 12, 14, 16, 18, 19, 21], "serv": 1, "0": [1, 7, 9, 11, 12, 14, 16, 18, 19, 21], "port": 1, "8000": [1, 21], "open": [1, 10, 11, 14], "browser": [1, 4, 6], "go": [1, 9, 11, 14], "read": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "code": [2, 3, 7, 9, 14, 16, 18, 19, 21], "style": [2, 3, 7], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 8, 9, 11, 14], "recommend": [2, 8, 9, 11, 12, 14], "test": [2, 7, 9, 11, 12, 14, 16, 18, 19], "valid": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "dataset": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "lhots": [2, 7, 9, 11, 14], "readthedoc": [2, 8], "io": [2, 8], "en": [2, 8], "latest": [2, 6, 8, 14, 16], "index": [2, 8], "yesno": [2, 7, 8, 13, 21], "veri": [2, 3, 11, 18, 19, 21], "good": 2, "exampl": [2, 6, 7, 16, 18, 19, 21], "speech": [2, 6, 7, 8, 10, 11, 13, 21], "pull": [2, 9, 11, 14], "380": [2, 19], "show": [2, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "add": [2, 9, 11, 12, 13], "new": [2, 3, 6, 8, 9, 10, 11, 12, 14, 16, 21], "suppos": 2, "would": [2, 8, 14, 16], "name": [2, 9, 11], "foo": [2, 9, 14], "eg": [2, 5, 8, 9, 11, 12, 14, 16, 18, 19, 21], "mkdir": [2, 9, 11, 12, 14, 16, 18, 19, 21], "p": [2, 8, 11, 18, 19], "asr": [2, 5, 8, 9, 11, 12, 14, 16, 18, 19, 21], "touch": 2, "sh": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "chmod": 2, "x": 2, "simpl": [2, 11], "own": 2, "otherwis": [2, 9, 11, 14], "librispeech": [2, 5, 7, 13, 14, 16], "assum": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "fanci": 2, "call": 2, "bar": [2, 9, 14], "organ": 2, "wai": [2, 3], "readm": [2, 9, 11, 12, 14, 16, 18, 19, 21], "md": [2, 5, 9, 11, 12, 14, 16, 18, 19, 21], "asr_datamodul": [2, 8], "pretrain": [2, 9, 11, 12, 14, 16, 18, 19, 21], "For": [2, 5, 9, 11, 12, 14, 16, 18, 19, 21], "instanc": [2, 5, 9, 11, 12, 14, 16, 18, 19, 21], "tdnn": [2, 8, 10, 13, 15, 17, 20], "its": [2, 11], "directori": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "structur": 2, "descript": [2, 9, 11, 12, 14, 16, 18, 19, 21], "contain": [2, 7, 9, 11, 12, 13, 14, 16, 18, 19, 21], "inform": [2, 9, 11, 12, 14, 16, 18, 19, 21], "g": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "wer": [2, 8, 16, 18, 19, 21], "etc": [2, 9, 11, 12, 14, 16, 18, 19, 21], "provid": [2, 6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21], "pytorch": [2, 7, 11], "dataload": [2, 8], "take": [2, 21], "input": [2, 9, 11, 12, 14, 16, 18, 19, 21], "checkpoint": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "save": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "dure": [2, 6, 9, 11, 12, 14, 16, 18, 19, 21], "stage": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "definit": 2, "neural": [2, 9, 14], "network": [2, 9, 11, 14], "script": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "infer": 2, "tdnn_lstm_ctc": [2, 12, 16, 19], "conformer_ctc": [2, 9, 14], "get": [2, 6, 8, 9, 11, 12, 14, 16, 21], "feel": 2, "result": [2, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "everi": 2, "kept": 2, "self": 2, "toler": 2, "duplic": 2, "among": [2, 8], "differ": [2, 8, 9, 10, 14], "invoc": 2, "help": [2, 9, 11, 12, 14, 16, 18, 19, 21], "blob": [2, 5], "master": [2, 5, 11], "transform": [2, 9, 14], "conform": [2, 10, 11, 13, 15], "base": [2, 9, 11, 12, 14], "lstm": [2, 10, 13, 15, 17], "attent": [2, 11, 12], "lm": [2, 8, 11, 16, 18, 19, 21], "rescor": [2, 12, 16, 18, 19, 21], "demonstr": [2, 4, 6], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 9, 11, 12, 14], "mani": 3, "two": [3, 9, 11, 12, 14, 16, 18, 19, 21], "them": [3, 4, 5, 6, 9, 11, 12, 14, 16, 18, 19, 21], "To": [3, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "document": [3, 7], "repositori": 3, "recip": [3, 5, 7, 8, 9, 11, 12, 14, 16, 18, 19, 21], "In": [3, 6, 9, 11, 12, 14, 16, 18, 19, 21], "page": [3, 6, 9, 11, 12, 13, 14, 16, 18, 19, 21], "describ": [3, 4, 9, 11, 12, 14, 16, 18, 19], "how": [3, 4, 6, 7, 8, 9, 11, 12, 14, 16, 18, 19, 21], "creat": [3, 7, 9, 11, 12, 14, 16, 18, 19, 21], "data": [3, 10], "train": [3, 4, 6, 7], "decod": [3, 6], "model": [3, 4, 6, 7, 8], "section": [4, 8, 9, 14], "find": [4, 5, 6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "also": [4, 5, 8, 9, 11, 12, 14, 21], "try": [4, 6], "from": [4, 6, 8, 9, 10, 11, 12, 14, 16, 18, 19, 21], "within": [4, 6], "without": [4, 6, 9, 14], "anyth": [4, 6], "space": [4, 7], "youtub": [4, 7, 14, 16], "video": [4, 7, 14, 16], "upload": [5, 6, 9, 11, 12, 14, 16, 18, 19, 21], "huggingfac": [5, 7, 9, 11, 12, 14, 16, 18, 19, 21], "co": [5, 6, 9, 10, 11, 12, 14, 16, 18, 19, 21], "visit": [5, 6], "link": [5, 8], "search": [5, 6], "specif": [5, 11], "correspond": [5, 6], "aishel": [5, 7, 9, 11, 12, 13], "gigaspeech": 5, "wenetspeech": 5, "integr": 6, "framework": 6, "sherpa": 6, "need": [6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "download": [6, 7, 10], "window": 6, "maco": 6, "linux": 6, "even": [6, 8], "ipad": 6, "phone": 6, "start": [6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "address": [6, 11], "recognit": [6, 7, 10, 11, 13, 21], "screenshot": [6, 9, 11, 12, 14, 21], "select": [6, 16, 18, 19, 21], "languag": [6, 9, 11, 12], "current": [6, 8, 11, 13], "chines": [6, 10, 11], "english": [6, 21], "target": 6, "method": [6, 8, 9, 11, 12, 14, 16, 18, 19], "greedi": 6, "modified_beam_search": [6, 11], "choos": [6, 8], "number": [6, 9, 11, 12, 14, 16, 18, 19, 21], "activ": 6, "path": [6, 9, 11, 12, 14], "either": [6, 9, 11, 12, 14], "record": [6, 9, 10, 11, 12, 14, 16, 18, 19, 21], "click": [6, 8, 9, 11, 12, 14, 21], "button": 6, "submit": 6, "wait": 6, "moment": 6, "an": [6, 8, 9, 10, 11, 14, 21], "when": [6, 11, 14], "bottom": 6, "part": [6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "tabl": 6, "one": [6, 9, 11, 12, 14, 16, 18, 19, 21], "next": [6, 8, 14, 16], "gen": [6, 8, 14, 16], "kaldi": [6, 8, 14, 16], "subscrib": [6, 8, 14, 16], "channel": [6, 8, 9, 11, 12, 14, 16, 18, 19, 21], "nadira": [6, 8, 14, 16], "povei": [6, 8, 14, 16], "www": [6, 8, 10, 14, 16], "uc_vaumpkminz1pnkfxan9mw": [6, 8, 14, 16], "torchaudio": 7, "1": [7, 16, 18, 19, 21], "timit": [7, 13, 18, 19], "contribut": 7, "depend": [8, 9, 14], "step": [8, 9, 11, 12, 14, 21], "order": [8, 9, 12, 14, 16, 18, 19], "matter": 8, "org": [8, 10, 11], "least": 8, "v1": [8, 9, 12, 14, 16, 18, 19], "alreadi": 8, "don": [8, 9, 12, 14, 16, 18, 19, 21], "t": [8, 9, 11, 12, 14, 16, 18, 19, 21], "replac": 8, "compil": [8, 9, 11, 14], "against": 8, "strongli": 8, "collect": 8, "variabl": [8, 9, 12, 14], "pythonpath": 8, "point": [8, 9, 12, 14], "folder": [8, 9, 11, 12, 14, 16, 18, 19, 21], "tmp": [8, 9, 11, 12, 14, 16, 18, 19, 21], "setup": [8, 9, 11, 12, 14, 16, 18, 19, 21], "export": [8, 9, 11, 12, 14, 16, 18, 19, 21], "put": 8, "sever": [8, 9, 11, 12, 14, 16, 18, 19, 21], "same": [8, 9, 11, 12, 14, 16, 18, 19, 21], "switch": [8, 9, 14], "just": 8, "about": [8, 11], "virtualenv": 8, "8": [8, 9, 11, 14, 16, 21], "cpython3": 8, "6": [8, 9, 11, 14, 16, 18, 19], "final": [8, 14, 16], "64": [8, 11], "1540m": 8, "creator": 8, "cpython3posix": 8, "dest": 8, "ceph": [8, 9, 11, 14], "fj": [8, 11, 14], "fangjun": [8, 11, 14], "clear": 8, "fals": [8, 9, 11, 14], "no_vcs_ignor": 8, "global": 8, "seeder": 8, "fromappdata": 8, "bundl": 8, "setuptool": 8, "wheel": 8, "via": 8, "copi": 8, "app_data_dir": 8, "root": 8, "share": 8, "v": [8, 14, 18, 19], "irtualenv": 8, "ad": [8, 9, 11, 12, 14, 21], "seed": 8, "packag": 8, "57": [8, 14, 16], "36": [8, 11, 14], "bashactiv": 8, "cshellactiv": 8, "fishactiv": 8, "powershellactiv": 8, "pythonactiv": 8, "xonshactiv": 8, "sourc": [8, 9, 10, 11, 14], "bin": [8, 9, 14], "dev20210822": 8, "cpu": [8, 9, 21], "torch1": 8, "f": [8, 18, 19], "nightli": 8, "whl": 8, "2bcpu": 8, "cp38": 8, "linux_x86_64": 8, "mb": 8, "________________________________": 8, "185": [8, 9, 14, 21], "kb": [8, 18, 19], "graphviz": 8, "17": [8, 9, 14, 18, 19], "py3": 8, "none": [8, 9, 14], "18": [8, 9, 11, 12, 14, 16, 18, 19], "torch": [8, 9, 11, 14], "cach": 8, "manylinux1_x86_64": 8, "831": [8, 11, 19], "type": [8, 9, 11, 14, 21], "extens": 8, "typing_extens": 8, "10": [8, 9, 11, 12, 14, 16, 18, 19, 21], "26": [8, 11, 14, 19], "successfulli": 8, "probabl": [8, 11], "cuda": [8, 9, 11, 12, 14, 16, 18, 19], "req": 8, "7b1b76ge": 8, "q": 8, "audioread": 8, "soundfil": 8, "post1": 8, "py2": 8, "7": [8, 9, 12, 14, 16, 18, 19], "97": [8, 9], "cytoolz": 8, "11": [8, 9, 11, 12, 14, 16, 18, 19, 21], "manylinux_2_17_x86_64": 8, "manylinux2014_x86_64": 8, "dataclass": 8, "14": [8, 9, 14, 16, 18], "h5py": 8, "manylinux_2_12_x86_64": 8, "manylinux2010_x86_64": 8, "684": [8, 9, 21], "intervaltre": 8, "lilcom": 8, "numpi": 8, "15": [8, 11, 12, 14, 18, 21], "40": [8, 12, 14, 16, 18, 19], "pyyaml": 8, "662": 8, "tqdm": 8, "62": [8, 14, 16], "76": [8, 21], "73": 8, "satisfi": 8, "lib": 8, "site": 8, "dev": [8, 9, 11, 12, 14, 16, 18, 19, 21], "2a1410b": 8, "clean": [8, 9, 11, 14, 16], "toolz": 8, "55": [8, 12, 14, 18], "sortedcontain": 8, "29": [8, 9, 11, 12, 14, 16, 18, 19], "cffi": 8, "411": [8, 14], "pycpars": 8, "20": [8, 9, 11, 12, 14, 16, 18, 19, 21], "112": 8, "pypars": 8, "67": 8, "done": [8, 9, 11, 12, 14, 16, 18, 19, 21], "filenam": 8, "dev_2a1410b_clean": 8, "size": [8, 9, 11, 12, 14, 16, 18, 19, 21], "342242": 8, "sha256": 8, "f683444afa4dc0881133206b4646a": 8, "9d0f774224cc84000f55d0a67f6e4a37997": 8, "store": [8, 14], "ephem": 8, "ftu0qysz": 8, "7f": 8, "7a": 8, "8e": 8, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 8, "warn": 8, "built": 8, "invalid": [8, 14], "metadata": [8, 18, 19], "mandat": 8, "pep": 8, "440": 8, "packa": 8, "ging": 8, "deprec": [8, 11], "legaci": 8, "becaus": 8, "could": [8, 9, 12], "A": [8, 9, 11, 12, 14, 16], "discuss": 8, "regard": 8, "pypa": 8, "sue": 8, "8368": 8, "inter": 8, "valtre": 8, "sor": 8, "tedcontain": 8, "remot": 8, "enumer": 8, "object": [8, 9, 11, 12, 21], "500": [8, 11, 14], "count": 8, "100": [8, 9, 11, 12, 14], "compress": 8, "308": [8, 9, 11, 12], "total": [8, 9, 11, 12, 14, 16, 21], "delta": 8, "263": 8, "reus": 8, "307": 8, "102": [8, 9], "pack": 8, "receiv": 8, "172": 8, "49": [8, 14, 19, 21], "kib": 8, "385": 8, "00": [8, 9, 11, 12, 14, 16, 18, 19, 21], "resolv": 8, "kaldilm": 8, "tar": 8, "gz": 8, "48": [8, 9, 11], "574": 8, "kaldialign": 8, "sentencepiec": [8, 14], "96": 8, "tensorboard": [8, 9, 11, 12, 14, 16, 18, 19, 21], "41": [8, 9, 11, 18, 21], "line": 8, "absl": 8, "absl_pi": 8, "13": [8, 11, 12, 14, 16, 18], "132": 8, "googl": 8, "auth": 8, "oauthlib": 8, "google_auth_oauthlib": 8, "grpcio": 8, "24": [8, 12, 16, 18, 19, 21], "39": [8, 11, 14, 16, 18], "ment": 8, "requi": 8, "rement": 8, "protobuf": 8, "manylinux_2_5_x86_64": 8, "werkzeug": 8, "288": 8, "tensorboard_data_serv": 8, "google_auth": 8, "35": [8, 11, 14], "152": 8, "request": 8, "plugin": 8, "wit": 8, "tensorboard_plugin_wit": 8, "781": 8, "markdown": 8, "six": 8, "16": [8, 9, 11, 12, 14, 16, 18, 19, 21], "cachetool": 8, "rsa": 8, "34": 8, "pyasn1": 8, "modul": 8, "pyasn1_modul": 8, "155": 8, "requests_oauthlib": 8, "23": [8, 9, 11, 12, 14, 18, 19, 21], "77": [8, 14], "urllib3": 8, "27": [8, 9, 11, 16, 19], "138": [8, 9, 11], "certifi": 8, "2017": 8, "2021": [8, 9, 12, 14, 16, 18, 19, 21], "30": [8, 9, 11, 12, 14, 21], "145": 8, "charset": 8, "normal": [8, 16, 18, 19, 21], "charset_norm": 8, "idna": 8, "59": [8, 12, 14], "146": 8, "897233": 8, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 8, "f4c0d0cbc66eee6c88d68a63862": 8, "85": 8, "7d": 8, "63": [8, 11], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 8, "etool": 8, "oaut": 8, "hlib": 8, "let": [8, 9, 14], "u": [8, 9, 11, 12, 14, 21], "log": [8, 16, 18, 19, 21], "08": [8, 14, 16, 18, 19, 21], "19": [8, 9, 14, 16, 18, 19], "main": [8, 9, 14], "dl_dir": [8, 9, 12, 14], "waves_yesno": 8, "49mb": 8, "03": [8, 11, 14, 18, 19], "39mb": 8, "manifest": 8, "31": [8, 14], "42": [8, 9, 14, 21], "comput": [8, 9, 11, 12, 16, 18, 19, 21], "fbank": [8, 9, 11, 12, 14, 16, 18, 19, 21], "32": [8, 9, 11, 12], "803": 8, "info": [8, 9, 11, 12, 14, 16, 18, 19, 21], "compute_fbank_yesno": 8, "52": [8, 9, 14], "process": [8, 9, 11, 12, 14], "extract": [8, 9, 11, 12, 14, 16, 18, 19, 21], "featur": [8, 9, 11, 12, 14, 16, 18, 19, 21], "_______________________________________________________________": 8, "90": 8, "01": [8, 11, 12, 14], "80": [8, 9, 11, 14], "57it": 8, "085": 8, "______________________________________________________________": 8, "248": [8, 11], "21it": 8, "lang": [8, 11, 14], "fcordre9": 8, "kaldilm_6899d26f2d684ad48f21025950cd2866": 8, "csrc": [8, 14], "arpa_file_pars": 8, "cc": 8, "void": 8, "arpafilepars": 8, "rea": 8, "d": [8, 18, 19], "std": 8, "istream": 8, "79": 8, "140": [8, 12], "gram": [8, 9, 11, 12, 14, 16, 18, 19], "89": [8, 9], "hlg": [8, 16, 18, 19, 21], "928": 8, "compile_hlg": 8, "120": 8, "lang_phon": [8, 12, 16, 18, 19, 21], "929": [8, 11], "lexicon": [8, 9, 11, 12, 14, 21], "116": 8, "convert": [8, 14], "l": [8, 11, 18, 19, 21], "pt": [8, 9, 11, 12, 14, 16, 18, 19, 21], "linv": [8, 11, 14, 21], "931": 8, "ctc_topo": 8, "max_token_id": 8, "932": 8, "load": [8, 9, 11, 12, 14, 16, 18, 19, 21], "fst": [8, 11, 21], "intersect": 8, "933": 8, "lg": 8, "shape": 8, "66": 8, "connect": [8, 14, 16], "68": [8, 14], "70": 8, "class": [8, 14], "tensor": [8, 9, 11, 12, 14, 21], "71": [8, 14, 16], "determin": 8, "934": 8, "74": 8, "_k2": 8, "raggedint": 8, "remov": [8, 9, 11, 12, 14, 16, 18, 19], "disambigu": 8, "symbol": [8, 11], "87": 8, "remove_epsilon": 8, "935": 8, "92": [8, 14], "arc": 8, "95": [8, 10], "compos": 8, "h": 8, "105": [8, 14], "936": 8, "107": [8, 16], "123": 8, "now": [8, 9, 14, 16, 18, 19], "cuda_visible_devic": [8, 9, 11, 12, 14, 16, 18, 19, 21], "gpu": [8, 9, 11, 12, 14, 18, 19, 21], "avail": [8, 9, 11, 14, 16, 18, 19, 21], "given": [8, 9, 11, 12, 14, 16], "below": [8, 9, 11, 12, 14, 16, 18, 19, 21], "072": 8, "465": 8, "466": 8, "exp_dir": [8, 11, 14], "posixpath": [8, 11, 14], "exp": [8, 9, 11, 12, 14, 16, 18, 19, 21], "lang_dir": [8, 11, 14], "lr": [8, 11], "feature_dim": [8, 9, 11, 14, 21], "weight_decai": 8, "1e": 8, "06": [8, 12, 14, 16, 21], "start_epoch": 8, "best_train_loss": 8, "inf": 8, "best_valid_loss": 8, "best_train_epoch": 8, "best_valid_epoch": 8, "batch_idx_train": 8, "log_interv": 8, "valid_interv": 8, "beam_siz": [8, 11], "reduct": 8, "sum": 8, "use_doub": 8, "le_scor": 8, "true": [8, 9, 11, 14, 16, 18, 19, 21], "world_siz": 8, "master_port": 8, "12354": 8, "num_epoch": 8, "feature_dir": [8, 14], "max_dur": [8, 14], "bucketing_sampl": [8, 14], "num_bucket": [8, 14], "concatenate_cut": [8, 14], "duration_factor": [8, 14], "gap": [8, 14], "on_the_fly_feat": [8, 14], "shuffl": [8, 14], "return_cut": [8, 14], "num_work": [8, 14], "074": 8, "113": [8, 11, 14], "098": [8, 16], "cut": [8, 14], "240": [8, 9, 21], "149": [8, 14], "200": [8, 9, 14, 18, 19, 21], "singlecutsampl": 8, "206": [8, 14], "219": [8, 11, 14], "246": [8, 11, 14, 18, 19], "357": 8, "416": 8, "epoch": [8, 9, 11, 12, 14, 16, 18, 19, 21], "batch": [8, 9, 11, 12, 14], "avg": [8, 11, 14, 16, 18, 19, 21], "loss": [8, 9, 12, 14, 16, 18, 19, 21], "0789": 8, "848": 8, "5356": 8, "7556": 8, "301": [8, 14], "432": [8, 14], "9972": 8, "best": [8, 9, 12, 14], "805": 8, "2436": 8, "5717": 8, "33": [8, 9, 10, 11, 14, 18], "109": [8, 9, 14], "4167": 8, "121": [8, 16], "325": 8, "2214": 8, "798": [8, 11], "0781": 8, "1343": 8, "065": 8, "0859": 8, "556": 8, "0421": 8, "0975": 8, "810": 8, "0431": 8, "824": 8, "657": 8, "0109": 8, "984": [8, 14], "0093": 8, "0096": 8, "50": [8, 14, 18], "239": [8, 11], "0104": 8, "0101": 8, "569": 8, "0092": 8, "819": [8, 18], "835": 8, "51": [8, 9, 14, 21], "024": 8, "0105": 8, "317": 8, "0099": 8, "0097": 8, "552": 8, "0108": 8, "869": 8, "0102": 8, "126": [8, 14], "128": [8, 14], "537": [8, 14], "192": [8, 14], "249": 8, "250": [8, 11, 16], "lm_dir": [8, 14], "search_beam": [8, 9, 14, 21], "output_beam": [8, 9, 14, 21], "min_active_st": [8, 9, 14, 21], "max_active_st": [8, 9, 14, 21], "10000": [8, 9, 14, 21], "use_double_scor": [8, 9, 14, 21], "193": 8, "213": [8, 21], "259": [8, 9], "devic": [8, 9, 11, 12, 14, 16, 18, 19, 21], "217": [8, 9, 14], "279": [8, 14], "averag": [8, 9, 11, 12, 14, 16, 18, 19, 21], "userwarn": [8, 11], "floor_divid": 8, "futur": [8, 11, 13], "round": [8, 11], "toward": [8, 11], "trunc": [8, 11], "function": [8, 9, 11, 12, 14, 16, 18, 19, 21], "NOT": [8, 9, 11, 14, 21], "floor": [8, 11], "incorrect": [8, 11], "neg": [8, 11], "valu": [8, 9, 11, 12, 14], "keep": [8, 11], "behavior": [8, 11], "div": [8, 11], "b": [8, 11, 14, 18, 19], "rounding_mod": [8, 11], "actual": [8, 9, 11, 12, 14], "divis": [8, 11], "trigger": 8, "intern": 8, "aten": 8, "src": 8, "nativ": 8, "binaryop": 8, "cpp": 8, "450": [8, 9, 11, 12], "k": [8, 18, 19], "n": [8, 9, 14, 18, 19], "220": [8, 11, 12, 14], "409": 8, "190": [8, 16], "until": [8, 14], "571": [8, 14], "228": [8, 14], "transcript": [8, 9, 10, 11, 12, 14, 16, 18, 19], "recog": [8, 11, 14], "test_set": [8, 21], "572": 8, "util": [8, 14], "ins": [8, 14, 21], "del": [8, 14, 21], "sub": [8, 14, 21], "573": 8, "236": 8, "wrote": [8, 14], "detail": [8, 9, 11, 12, 14, 16, 18, 19, 21], "error": [8, 14], "stat": [8, 14], "err": [8, 11, 14], "299": 8, "congratul": [8, 9, 12, 14, 16, 18, 19, 21], "first": [8, 9, 11, 12, 14, 16, 18, 19, 21], "fun": 8, "debug": 8, "variou": [8, 13], "problem": 8, "mai": [8, 9, 11, 12, 13, 14], "encount": [8, 9, 11, 12, 14], "while": [8, 9, 11, 12, 14], "tutori": [9, 11, 12, 14, 16, 18, 19], "learn": [9, 11, 12, 14, 16, 18, 19, 21], "singl": [9, 11, 12, 14, 16, 18, 19, 21], "multipl": [9, 11, 12, 14, 16, 18, 19, 21], "1best": [9, 12, 14, 16, 18, 19], "handl": [9, 12, 14, 16, 18, 19, 21], "automag": [9, 12, 14, 16, 18, 19, 21], "stop": [9, 11, 12, 14, 16, 18, 19, 21], "control": [9, 11, 12, 14, 16, 18, 19, 21], "By": [9, 12, 14, 16, 18, 19, 21], "default": [9, 11, 12, 14, 16, 18, 19, 21], "execut": [9, 12, 14, 16, 18, 19, 21], "mean": [9, 11, 12, 14, 16, 18, 19, 21], "onli": [9, 11, 12, 13, 14, 16, 18, 19, 21], "musan": [9, 12, 14], "sai": [9, 11, 12, 14, 16, 18, 19, 21], "thei": [9, 11, 12, 14], "won": [9, 12, 14], "re": [9, 12, 14], "intal": [9, 12], "initi": [9, 12], "lf": [9, 11, 12, 14, 16, 18, 19, 21], "sudo": [9, 12], "apt": [9, 12], "permiss": [9, 12], "binari": [9, 11, 12, 14, 21], "here": [9, 11, 12, 14, 16], "pass": [9, 11, 12, 14], "commandlin": [9, 11, 12, 14], "quit": [9, 11, 12, 14], "often": [9, 11, 12, 14], "dir": [9, 11, 12, 14, 16, 18, 19, 21], "experi": [9, 11, 12, 14, 21], "num": [9, 11, 12, 14], "resum": [9, 11, 12, 14, 16, 18, 19, 21], "state": [9, 11, 12, 14], "world": [9, 11, 12, 14, 16], "multi": [9, 11, 12, 14], "machin": [9, 11, 12, 14], "ddp": [9, 11, 12, 14], "4": [9, 11, 12, 14, 16, 18, 19, 21], "implement": [9, 11, 12, 14], "present": [9, 11, 12, 14], "later": [9, 12, 14, 16, 18, 19], "max": [9, 11, 12, 14], "durat": [9, 11, 12, 14, 16, 18, 19, 21], "specifi": [9, 11, 12, 14, 16, 18, 19, 21], "second": [9, 11, 12, 14, 21], "over": [9, 11, 12, 14], "utter": [9, 11, 12, 14], "pad": [9, 11, 12, 14], "oom": [9, 11, 12, 14], "reduc": [9, 11, 12, 14, 16, 18, 19, 21], "v100": [9, 11, 12, 14], "nvidia": [9, 11, 12, 14], "due": [9, 11, 12, 14], "usual": [9, 11, 12, 14], "larger": [9, 11, 12, 14], "than": [9, 11, 12, 14, 16, 21], "caus": [9, 11, 12, 14], "smaller": [9, 11, 12, 14], "increas": [9, 11, 12, 14], "tune": [9, 11, 12, 14], "weight": [9, 12, 14], "decai": [9, 12, 14], "warmup": [9, 11, 12, 14], "get_param": [9, 11, 12, 14, 16, 18, 19, 21], "paramet": [9, 11, 12, 14, 16, 18, 19], "realli": [9, 12, 14], "directli": [9, 11, 12, 14], "perturb": [9, 11, 12, 14], "speed": [9, 11, 12, 14], "factor": [9, 11, 12, 14], "each": [9, 11, 12, 14], "3x150": [9, 11, 12], "hour": [9, 11, 12, 14], "These": [9, 11, 12, 14, 16, 18, 19, 21], "state_dict": [9, 11, 12, 14, 16, 18, 19, 21], "optim": [9, 11, 12, 14, 16, 18, 19, 21], "rate": [9, 11, 12, 14, 16, 18, 19, 21], "visual": [9, 11, 12, 14, 16, 18, 19, 21], "logdir": [9, 11, 12, 14, 16, 18, 19, 21], "labelsmooth": 9, "someth": [9, 11, 12, 14, 21], "tensorflow": [9, 11, 12, 14, 21], "found": [9, 11, 12, 14, 21], "continu": [9, 11, 12, 14, 21], "press": [9, 11, 12, 14, 21], "ctrl": [9, 11, 12, 14, 21], "engw8ksktzqs24zbv5dgcg": 9, "22t11": 9, "09": [9, 11, 12, 14], "scan": [9, 11, 12, 14, 21], "116068": 9, "scalar": [9, 11, 12, 14, 21], "listen": [9, 11, 12, 21], "note": [9, 11, 12, 14, 16, 18, 19, 21], "url": [9, 11, 12, 14, 21], "output": [9, 11, 12, 14, 16, 18, 19, 21], "xxxx": [9, 11, 12, 14, 16, 18, 19, 21], "text": [9, 11, 12, 14, 16, 18, 19, 21], "saw": [9, 11, 12, 14, 16, 18, 19, 21], "consol": [9, 11, 12, 14, 16, 18, 19, 21], "typic": [9, 11, 12, 14], "avoid": [9, 11, 14], "commonli": [9, 11, 12, 14, 16, 18, 19, 21], "nbest": [9, 14], "scale": [9, 14, 16, 18, 19], "down": [9, 14], "lattic": [9, 12, 14, 16, 18, 19], "score": [9, 14], "more": [9, 14, 21], "uniqu": [9, 14], "pkufool": [9, 12, 16], "icefall_asr_aishell_conformer_ctc": 9, "transcrib": [9, 11, 12, 14], "sound": [9, 11, 12, 14, 16, 18, 19, 21], "csukuangfj": [9, 11, 12, 14, 16, 18, 19, 21], "tree": [9, 11, 12, 14, 16, 18, 19, 21], "lang_char": [9, 11], "token": [9, 11, 12, 14, 16, 18, 19, 21], "word": [9, 11, 12, 14, 16, 18, 19, 21], "test_wav": [9, 11, 12, 14, 16, 18, 19, 21], "bac009s0764w0121": [9, 11, 12], "wav": [9, 11, 12, 14, 18, 19, 21], "bac009s0764w0122": [9, 11, 12], "bac009s0764w0123": [9, 11, 12], "tran": [9, 12, 14, 16, 18, 19], "graph": [9, 12, 14, 16, 18, 19], "id": [9, 12, 14, 16, 18, 19], "conveni": [9, 12, 14], "eo": [9, 12, 14], "easili": [9, 12, 14], "obtain": [9, 11, 12, 14, 16, 18, 19], "25": [9, 14, 18, 19, 21], "84": 9, "list": [9, 11, 12, 14, 16, 18, 19], "soxi": [9, 11, 12, 14, 16, 21], "sampl": [9, 11, 12, 14, 16, 21], "16000": [9, 11, 12, 14, 16, 18, 19], "precis": [9, 11, 12, 14, 16, 21], "bit": [9, 11, 12, 14, 16, 21], "04": [9, 11, 12, 14, 16, 18, 19], "67263": [9, 11, 12], "315": [9, 11, 12, 14, 16], "295": [9, 11, 12, 14], "cdda": [9, 11, 12, 14, 16, 21], "sector": [9, 11, 12, 14, 16, 21], "135k": [9, 11, 12], "256k": [9, 11, 12, 14], "encod": [9, 11, 12, 14, 16, 21], "sign": [9, 11, 12, 14, 21], "integ": [9, 11, 12, 14, 21], "pcm": [9, 11, 12, 14, 21], "65840": [9, 11, 12], "625": [9, 11, 12], "132k": [9, 11, 12], "64000": [9, 11, 12], "300": [9, 11, 12, 14], "128k": [9, 11, 12, 21], "displai": [9, 11, 12, 14], "support": [9, 11, 14], "three": [9, 11], "topologi": [9, 14], "07": [9, 11, 12, 14], "53": [9, 16, 19], "707": [9, 14], "229": 9, "sample_r": [9, 11, 14, 21], "subsampling_factor": [9, 11, 14], "nhead": [9, 11, 14], "attention_dim": [9, 11, 14], "512": [9, 11, 14], "num_decoder_lay": [9, 14], "vgg_frontend": [9, 11, 14], "use_feat_batchnorm": [9, 14], "env_info": [9, 11, 14], "releas": [9, 11, 14], "sha1": [9, 11, 14], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 9, "date": [9, 11, 14], "sun": 9, "sep": 9, "46": [9, 14], "33cfe45": 9, "branch": [9, 11, 14], "d57a873": 9, "dirti": [9, 14], "wed": [9, 11, 14], "nov": [9, 14], "hw": 9, "kangwei": 9, "icefall_aishell3": 9, "k2_releas": 9, "__init__": [9, 11, 14], "tokens_fil": 9, "words_fil": [9, 14, 21], "num_path": [9, 14], "ngram_lm_scal": [9, 14], "attention_decoder_scal": [9, 14], "nbest_scal": [9, 14], "sos_id": [9, 14], "eos_id": [9, 14], "num_class": [9, 14, 21], "4336": [9, 11], "sound_fil": [9, 11, 14, 21], "708": [9, 11, 14, 21], "242": [9, 14], "131": [9, 14], "construct": [9, 11, 12, 14, 16, 18, 19, 21], "134": 9, "269": [9, 18, 19], "275": 9, "241": 9, "293": [9, 14], "704": [9, 18], "369": [9, 14], "\u751a": [9, 11], "\u81f3": [9, 11], "\u51fa": [9, 11], "\u73b0": [9, 11], "\u4ea4": [9, 11], "\u6613": [9, 11], "\u51e0": [9, 11], "\u4e4e": [9, 11], "\u505c": [9, 11], "\u6b62": 9, "\u7684": [9, 11, 12], "\u60c5": [9, 11], "\u51b5": [9, 11], "\u4e00": [9, 11], "\u4e8c": [9, 11], "\u7ebf": [9, 11, 12], "\u57ce": [9, 11], "\u5e02": [9, 11], "\u867d": [9, 11], "\u7136": [9, 11], "\u4e5f": [9, 11, 12], "\u5904": [9, 11], "\u4e8e": [9, 11], "\u8c03": [9, 11], "\u6574": [9, 11], "\u4e2d": [9, 11, 12], "\u4f46": [9, 11, 12], "\u56e0": [9, 11], "\u4e3a": [9, 11], "\u805a": [9, 11], "\u96c6": [9, 11], "\u4e86": [9, 11, 12], "\u8fc7": [9, 11], "\u591a": [9, 11], "\u516c": [9, 11], "\u5171": [9, 11], "\u8d44": [9, 11], "\u6e90": [9, 11], "371": 9, "37": [9, 11, 14, 18], "38": [9, 11, 14, 18], "683": 9, "47": [9, 14], "651": [9, 21], "654": 9, "659": 9, "752": 9, "321": 9, "887": 9, "340": 9, "370": 9, "\u751a\u81f3": [9, 12], "\u51fa\u73b0": [9, 12], "\u4ea4\u6613": [9, 12], "\u51e0\u4e4e": [9, 12], "\u505c\u6b62": 9, "\u60c5\u51b5": [9, 12], "\u4e00\u4e8c": [9, 12], "\u57ce\u5e02": [9, 12], "\u867d\u7136": [9, 12], "\u5904\u4e8e": [9, 12], "\u8c03\u6574": [9, 12], "\u56e0\u4e3a": [9, 12], "\u805a\u96c6": [9, 12], "\u8fc7\u591a": [9, 12], "\u516c\u5171": [9, 12], "\u8d44\u6e90": [9, 12], "372": 9, "recor": [9, 14], "highest": [9, 14], "05": [9, 11, 12, 14, 19], "965": 9, "966": 9, "821": 9, "822": 9, "826": 9, "916": 9, "115": [9, 14], "345": 9, "888": 9, "889": 9, "limit": [9, 11, 14], "memori": [9, 11, 14], "upgrad": [9, 14], "pro": [9, 14], "finish": [9, 11, 12, 14, 16, 18, 19, 21], "deploi": [9, 14], "At": [9, 14], "doe": [9, 11, 14, 21], "stream": [9, 14, 18, 19], "home": [9, 14], "checkout": [9, 14], "v2": [9, 14], "cmake": [9, 14], "dcmake_build_typ": [9, 14], "j": [9, 14], "hlg_decod": [9, 14], "four": [9, 14], "ln": [9, 14], "messag": [9, 14], "nn_model": [9, 14], "use_gpu": [9, 14], "word_tabl": [9, 14], "wave": [9, 14], "caution": [9, 14], "relat": [9, 11, 14, 16, 18, 19, 21], "forward": [9, 14], "cpu_jit": [9, 14], "cu": [9, 14], "int": [9, 14], "char": [9, 14], "124": [9, 14], "98": 9, "142": [9, 12, 14], "150": [9, 14], "693": [9, 18], "165": [9, 14], "nnet_output": [9, 14], "182": [9, 16], "180": [9, 14], "489": 9, "45": [9, 11, 14], "216": [9, 14, 18, 19], "mandarin": 10, "corpu": 10, "beij": 10, "shell": 10, "technologi": 10, "ltd": 10, "400": 10, "peopl": 10, "accent": 10, "area": 10, "china": 10, "invit": 10, "particip": 10, "conduct": 10, "quiet": 10, "indoor": 10, "high": 10, "fidel": 10, "microphon": 10, "downsampl": 10, "16khz": 10, "manual": 10, "accuraci": 10, "through": 10, "profession": 10, "annot": 10, "strict": 10, "inspect": 10, "free": 10, "academ": 10, "hope": [10, 14, 16], "moder": 10, "amount": 10, "research": 10, "field": 10, "openslr": 10, "ctc": [10, 13, 15, 17, 20], "stateless": [10, 13], "transduc": [10, 13], "instead": 11, "rnn": 11, "As": [11, 14], "head": 11, "dim": 11, "layer": 11, "feedforward": 11, "2048": 11, "embed": 11, "conv1d": 11, "kernel": 11, "left": 11, "context": 11, "joiner": 11, "nn": 11, "tanh": 11, "linear": 11, "borrow": 11, "ieeexplor": 11, "ieee": 11, "stamp": 11, "jsp": 11, "arnumb": 11, "9054419": 11, "predict": 11, "modif": 11, "right": 11, "charact": 11, "unit": 11, "whose": [11, 14, 16], "vocabulari": 11, "87939824": 11, "88": 11, "optimized_transduc": 11, "extra": 11, "technqiu": 11, "propos": 11, "improv": 11, "end": [11, 21], "furthermor": 11, "maximum": 11, "emit": 11, "per": 11, "frame": 11, "simplifi": 11, "significantli": 11, "degrad": 11, "perform": 11, "exactli": 11, "benchmark": 11, "unprun": 11, "advantag": 11, "minim": 11, "pruned_transducer_stateless": 11, "prune": 11, "other": [11, 13, 14, 16, 18, 19, 21], "altern": 11, "though": 11, "transducer_stateless_modifi": 11, "option": [11, 16, 18, 19, 21], "pr": 11, "gb": 11, "ram": 11, "small": [11, 18, 19, 21], "tri": 11, "prob": 11, "appli": 11, "configur": [11, 16, 18, 19, 21], "c": [11, 12, 21], "lagz6hrcqxoigbfd5e0y3q": 11, "2022": 11, "03t14": 11, "8477": 11, "greedy_search": 11, "sym": 11, "beam_search": 11, "decoding_method": 11, "beam_4": 11, "02": [11, 14, 19], "28": [11, 14, 16], "ensur": 11, "give": 11, "poor": 11, "531": [11, 12], "994": [11, 14], "176": [11, 14], "027": 11, "encoder_out_dim": 11, "dim_feedforward": 11, "num_encoder_lay": 11, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 11, "thu": [11, 14, 16], "feb": 11, "miss": [11, 14], "50d2281": 11, "mar": 11, "hostnam": 11, "de": 11, "74279": 11, "0815224919": 11, "75d558775b": 11, "mmnv8": 11, "ip": 11, "177": [11, 12, 14], "72": [11, 14], "context_s": 11, "max_sym_per_fram": 11, "blank_id": 11, "vocab_s": 11, "878": [11, 19], "257": [11, 18, 19], "880": 11, "267": [11, 18, 19], "891": 11, "273": 11, "__floordiv__": 11, "length": 11, "x_len": 11, "163": [11, 14], "320": 11, "\u6ede": 11, "322": 11, "285": [11, 14], "759": 11, "760": 11, "919": 11, "922": 11, "046": 11, "047": 11, "319": [11, 14], "214": [11, 14], "215": [11, 14, 16], "402": 11, "topk_hyp_index": 11, "topk_index": 11, "logit": 11, "583": [11, 19], "2000": 12, "lji9mwuorlow3jkdhxwk8a": 12, "13t11": 12, "4454": 12, "icefall_asr_aishell_tdnn_lstm_ctc": 12, "858": [12, 14], "389": [12, 14], "154": 12, "161": [12, 14], "536": 12, "171": [12, 14, 18, 19], "539": 12, "917": 12, "207": [12, 14], "129": 12, "\u505c\u6ede": 12, "222": [12, 14], "task": 13, "well": [13, 21], "ligru": [13, 17], "full": 14, "libri": 14, "960": 14, "subset": 14, "3x960": 14, "2880": 14, "lzgnetjwrxc3yghnmd4kpw": 14, "24t16": 14, "43": 14, "4540": 14, "sentenc": 14, "piec": 14, "And": 14, "neither": 14, "nor": 14, "vocab": 14, "work": 14, "5000": 14, "lang_bpe_500": 14, "44": [14, 18, 19], "033": 14, "538": 14, "full_libri": 14, "406": 14, "464": 14, "548": 14, "776": 14, "652": [14, 21], "109226120": 14, "714": [14, 18], "473": 14, "944": 14, "1328": 14, "54": [14, 16, 18, 19], "443": [14, 16], "2563": 14, "56": [14, 18], "494": 14, "592": 14, "331": [14, 16], "1715": 14, "52576": 14, "1424": 14, "807": 14, "506": 14, "808": [14, 18], "522": 14, "362": 14, "203": 14, "565": 14, "1477": 14, "106": 14, "2922": 14, "208": 14, "4295": 14, "52343": 14, "396": 14, "3584": 14, "433": 14, "680": [14, 18], "jit": 14, "bpe": 14, "_pickl": 14, "unpicklingerror": 14, "kei": 14, "hlg_modifi": 14, "g_4_gram": [14, 16, 18, 19], "1089": [14, 16], "134686": [14, 16], "0001": [14, 16], "1221": [14, 16], "135766": [14, 16], "0002": [14, 16], "reproduc": 14, "our": 14, "106000": [14, 16], "496": [14, 16], "875": [14, 16], "212k": 14, "267440": [14, 16], "1253": [14, 16], "535k": 14, "83": [14, 16], "77200": [14, 16], "361": [14, 16], "154k": 14, "554": 14, "260": 14, "bpe_model": 14, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 14, "tue": 14, "oct": 14, "22": [14, 18, 19, 21], "8d93169": 14, "266": [14, 16], "268": [14, 16], "600": 14, "601": 14, "758": 14, "025": 14, "204": 14, "425": 14, "earli": [14, 16], "nightfal": [14, 16], "THE": [14, 16], "yellow": [14, 16], "lamp": [14, 16], "light": [14, 16], "AND": [14, 16], "THERE": [14, 16], "squalid": [14, 16], "quarter": [14, 16], "OF": [14, 16], "broffel": 14, "god": [14, 16], "AS": [14, 16], "direct": [14, 16], "consequ": [14, 16], "sin": [14, 16], "man": [14, 16], "punish": [14, 16], "had": [14, 16], "her": [14, 16], "love": [14, 16], "child": [14, 16], "ON": [14, 16], "THAT": [14, 16], "dishonor": [14, 16], "osom": 14, "TO": [14, 16], "parent": [14, 16], "forev": [14, 16], "WITH": [14, 16], "race": [14, 16], "descent": [14, 16], "mortal": [14, 16], "BE": [14, 16], "bless": [14, 16], "soul": [14, 16], "IN": [14, 16], "heaven": [14, 16], "yet": [14, 16], "THESE": [14, 16], "thought": [14, 16], "affect": [14, 16], "hester": [14, 16], "prynn": [14, 16], "less": [14, 16, 21], "apprehens": [14, 16], "427": 14, "723": 14, "775": 14, "881": 14, "951": 14, "352": 14, "234": 14, "384": 14, "brothel": [14, 16], "bosom": [14, 16], "whole": [14, 16, 18, 19], "ngram": [14, 16, 18, 19], "Its": 14, "857": 14, "979": 14, "980": 14, "055": 14, "117": 14, "051": 14, "363": 14, "959": [14, 19], "546": 14, "598": 14, "599": [14, 16], "833": 14, "834": 14, "915": 14, "076": 14, "110": 14, "329": 14, "397": 14, "999": 14, "concaten": 14, "bucket": 14, "sampler": 14, "1000": 14, "ctc_decod": 14, "ngram_lm_rescor": 14, "attention_rescor": 14, "kind": 14, "316": 14, "118": 14, "58": 14, "221": 14, "125": [14, 21], "136": 14, "144": 14, "159": [14, 21], "543": 14, "174": 14, "topo": 14, "547": 14, "729": 14, "111": 14, "702": 14, "703": 14, "545": 14, "122": 14, "280": 14, "135": [14, 21], "153": [14, 21], "945": 14, "475": 14, "191": [14, 18, 19], "398": 14, "199": [14, 16], "515": 14, "205": 14, "w": [14, 18, 19], "deseri": 14, "441": 14, "fsaclass": 14, "loadfsa": 14, "const": 14, "string": 14, "c10": 14, "ignor": 14, "non": 14, "attribut": 14, "dummi": 14, "589": 14, "attention_scal": 14, "656": 14, "162": 14, "169": [14, 18, 19], "188": 14, "624": 14, "519": [14, 19], "632": 14, "645": [14, 21], "243": 14, "970": 14, "303": 14, "179": 14, "icefall_asr_librispeech_tdnn": 16, "lstm_ctc": 16, "flac": 16, "116k": 16, "140k": 16, "343k": 16, "164k": 16, "105k": 16, "174k": 16, "usag": [16, 18, 19, 21], "pretraind": 16, "168": 16, "170": 16, "581": 16, "584": [16, 19], "209": 16, "791": 16, "245": 16, "099": 16, "methond": [16, 18, 19], "725": 16, "403": 16, "631": 16, "010": 16, "tdnn_ligru_ctc": 18, "enough": [18, 19, 21], "luomingshuang": [18, 19], "icefall_asr_timit_tdnn_ligru_ctc": 18, "pretrained_average_9_25": 18, "fdhc0_si1559": [18, 19], "felc0_si756": [18, 19], "fmgd0_si1564": [18, 19], "ffprobe": [18, 19], "show_format": [18, 19], "nistspher": [18, 19], "database_id": [18, 19], "database_vers": [18, 19], "utterance_id": [18, 19], "dhc0_si1559": [18, 19], "sample_min": [18, 19], "4176": [18, 19], "sample_max": [18, 19], "5984": [18, 19], "bitrat": [18, 19], "258": [18, 19], "audio": [18, 19], "pcm_s16le": [18, 19], "hz": [18, 19], "s16": [18, 19], "256": [18, 19], "elc0_si756": [18, 19], "1546": [18, 19], "1989": [18, 19], "mgd0_si1564": [18, 19], "7626": [18, 19], "10573": [18, 19], "660": 18, "183": [18, 19], "695": 18, "697": 18, "210": [18, 19], "829": 18, "sil": [18, 19], "dh": [18, 19], "ih": [18, 19], "uw": [18, 19], "ah": [18, 19], "ii": [18, 19], "z": [18, 19], "aa": [18, 19], "ei": [18, 19], "dx": [18, 19], "uh": [18, 19], "ng": [18, 19], "th": [18, 19], "eh": [18, 19], "jh": [18, 19], "er": [18, 19], "ai": [18, 19], "hh": [18, 19], "aw": 18, "ae": [18, 19], "705": 18, "715": 18, "720": 18, "251": [18, 19], "348": 18, "ch": 18, "icefall_asr_timit_tdnn_lstm_ctc": 19, "pretrained_average_16_25": 19, "816": 19, "827": 19, "387": 19, "unk": 19, "739": 19, "971": 19, "977": 19, "978": 19, "981": 19, "ow": 19, "ykubhb5wrmosxykid1z9eg": 21, "23t23": 21, "sinc": 21, "icefall_asr_yesno_tdnn": 21, "l_disambig": 21, "lexicon_disambig": 21, "arpa": 21, "0_0_0_1_0_0_0_1": 21, "0_0_1_0_0_0_1_0": 21, "0_0_1_0_0_1_1_1": 21, "0_0_1_0_1_0_0_1": 21, "0_0_1_1_0_0_0_1": 21, "0_0_1_1_0_1_1_0": 21, "0_0_1_1_1_0_0_0": 21, "0_0_1_1_1_1_0_0": 21, "0_1_0_0_0_1_0_0": 21, "0_1_0_0_1_0_1_0": 21, "0_1_0_1_0_0_0_0": 21, "0_1_0_1_1_1_0_0": 21, "0_1_1_0_0_1_1_1": 21, "0_1_1_1_0_0_1_0": 21, "0_1_1_1_1_0_1_0": 21, "1_0_0_0_0_0_0_0": 21, "1_0_0_0_0_0_1_1": 21, "1_0_0_1_0_1_1_1": 21, "1_0_1_1_0_1_1_1": 21, "1_0_1_1_1_1_0_1": 21, "1_1_0_0_0_1_1_1": 21, "1_1_0_0_1_0_1_1": 21, "1_1_0_1_0_1_0_0": 21, "1_1_0_1_1_0_0_1": 21, "1_1_0_1_1_1_1_0": 21, "1_1_1_0_0_1_0_1": 21, "1_1_1_0_1_0_1_0": 21, "1_1_1_1_0_0_1_0": 21, "1_1_1_1_1_0_0_0": 21, "1_1_1_1_1_1_1_1": 21, "54080": 21, "507": 21, "108k": 21, "No": 21, "ye": 21, "hebrew": 21, "NO": 21, "621": 21, "119": 21, "127": 21, "650": 21, "139": 21, "143": 21, "198": 21, "181": 21, "186": 21, "187": 21, "287": 21, "correctli": 21, "simplest": 21}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": 2, "creat": [2, 8], "recip": [2, 13], "data": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "prepar": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "train": [2, 5, 8, 9, 11, 12, 14, 16, 18, 19, 21], "decod": [2, 8, 9, 11, 12, 14, 16, 18, 19, 21], "pre": [2, 5, 9, 11, 12, 14, 16, 18, 19, 21], "model": [2, 5, 9, 11, 12, 14, 16, 18, 19, 21], "huggingfac": [4, 6], "space": 6, "youtub": [6, 8], "video": [6, 8], "icefal": [7, 8], "content": [7, 13], "instal": [8, 9, 11, 12, 14, 16, 18, 19], "0": 8, "pytorch": 8, "torchaudio": 8, "1": [8, 9, 11, 12, 14], "k2": 8, "2": [8, 9, 11, 12, 14], "lhots": 8, "3": [8, 9, 11, 14], "download": [8, 9, 11, 12, 14, 16, 18, 19, 21], "exampl": [8, 9, 11, 12, 14], "virtual": 8, "environ": 8, "activ": 8, "your": 8, "4": 8, "5": 8, "test": 8, "conform": [9, 14], "ctc": [9, 12, 14, 16, 18, 19, 21], "configur": [9, 12, 14], "option": [9, 12, 14], "log": [9, 11, 12, 14], "usag": [9, 11, 12, 14], "case": [9, 11, 12, 14], "kaldifeat": [9, 11, 12, 14, 16, 18, 19, 21], "hlg": [9, 12, 14], "attent": [9, 14], "rescor": [9, 14], "colab": [9, 11, 12, 14, 16, 18, 19, 21], "notebook": [9, 11, 12, 14, 16, 18, 19, 21], "deploy": [9, 14], "c": [9, 14], "aishel": 10, "stateless": 11, "transduc": 11, "The": 11, "loss": 11, "todo": 11, "greedi": 11, "search": 11, "beam": 11, "modifi": 11, "tdnn": [12, 16, 18, 19, 21], "lstm": [12, 16, 19], "tabl": 13, "lm": 14, "comput": 14, "wer": 14, "librispeech": 15, "infer": [16, 18, 19, 21], "timit": 17, "ligru": 18, "yesno": 20}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 56}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "recipes/aishell/conformer_ctc", "recipes/aishell/index", "recipes/aishell/stateless_transducer", "recipes/aishell/tdnn_lstm_ctc", "recipes/index", "recipes/librispeech/conformer_ctc", "recipes/librispeech/index", "recipes/librispeech/lstm_pruned_stateless_transducer", "recipes/librispeech/tdnn_lstm_ctc", "recipes/timit/index", "recipes/timit/tdnn_ligru_ctc", "recipes/timit/tdnn_lstm_ctc", "recipes/yesno/index", "recipes/yesno/tdnn"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "recipes/aishell/conformer_ctc.rst", "recipes/aishell/index.rst", "recipes/aishell/stateless_transducer.rst", "recipes/aishell/tdnn_lstm_ctc.rst", "recipes/index.rst", "recipes/librispeech/conformer_ctc.rst", "recipes/librispeech/index.rst", "recipes/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/librispeech/tdnn_lstm_ctc.rst", "recipes/timit/index.rst", "recipes/timit/tdnn_ligru_ctc.rst", "recipes/timit/tdnn_lstm_ctc.rst", "recipes/yesno/index.rst", "recipes/yesno/tdnn.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Recipes", "Conformer CTC", "LibriSpeech", "Transducer", "TDNN-LSTM-CTC", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC"], "terms": {"we": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 20, 22], "us": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 14, 17, 19, 20, 22], "tool": [0, 16], "make": [0, 1, 3, 9, 11, 14, 16], "consist": [0, 11, 16], "possibl": [0, 2, 3, 8, 9, 14], "black": 0, "format": [0, 9, 11, 12, 14, 16, 17, 19, 20, 22], "flake8": 0, "check": [0, 14], "qualiti": [0, 10], "isort": 0, "sort": [0, 8], "import": 0, "The": [0, 1, 2, 6, 8, 9, 10, 12, 14, 16, 17, 19, 20, 22], "version": [0, 8, 9, 11, 12, 14, 16, 17, 19, 20], "abov": [0, 8, 9, 10, 11, 12, 14, 16, 22], "ar": [0, 1, 3, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 20, 22], "12": [0, 8, 9, 11, 12, 14, 16, 19, 22], "6b0": 0, "3": [0, 7, 12, 16, 17, 22], "9": [0, 8, 9, 11, 12, 14, 16, 17, 19, 22], "2": [0, 7, 16, 17, 19, 20, 22], "5": [0, 9, 11, 12, 14, 16, 17, 19, 20, 22], "after": [0, 1, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "run": [0, 2, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "command": [0, 1, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "git": [0, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "clone": [0, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "http": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "github": [0, 2, 5, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "com": [0, 2, 5, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "k2": [0, 2, 5, 6, 7, 9, 11, 12, 14, 16, 17, 19, 20], "fsa": [0, 2, 5, 6, 8, 9, 11, 14, 16], "icefal": [0, 2, 3, 5, 6, 9, 11, 12, 13, 14, 16, 17, 19, 20, 22], "cd": [0, 1, 2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "pip": [0, 1, 8, 11, 16], "instal": [0, 1, 4, 6, 7, 16, 22], "pre": [0, 3, 4, 6, 7, 8], "commit": 0, "whenev": 0, "you": [0, 1, 2, 5, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "automat": [0, 6], "hook": 0, "invok": 0, "fail": [0, 8], "If": [0, 2, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "ani": [0, 8, 9, 11, 12, 14, 16, 22], "your": [0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 16, 17, 19, 20, 22], "wa": [0, 8, 14, 17], "success": [0, 8], "pleas": [0, 1, 2, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "fix": [0, 8, 14], "issu": [0, 8, 14], "report": [0, 8], "some": [0, 1, 9, 11, 12, 14, 16, 17, 19, 20, 22], "i": [0, 1, 2, 6, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "e": [0, 2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "modifi": [0, 9, 12, 14, 16, 17, 19, 20, 22], "file": [0, 2, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "place": [0, 8, 11, 14, 17], "so": [0, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "statu": 0, "failur": 0, "see": [0, 1, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "which": [0, 2, 6, 9, 10, 11, 12, 14, 17, 19, 20, 22], "ha": [0, 2, 9, 11, 12, 14, 16, 17, 19, 20], "been": [0, 11], "befor": [0, 1, 9, 11, 12, 14, 16], "further": 0, "chang": [0, 9, 11, 12, 14, 16, 17, 19, 20, 22], "all": [0, 5, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "again": [0, 22], "should": [0, 2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "succe": 0, "thi": [0, 2, 3, 4, 8, 9, 11, 12, 13, 14, 16, 17, 19, 20, 22], "time": [0, 9, 11, 12, 14, 16, 17, 19, 20, 22], "succeed": 0, "want": [0, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "can": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "do": [0, 2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "21": [0, 8, 9, 11, 14, 19, 20], "your_changed_fil": 0, "py": [0, 2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 5, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "prepar": [1, 3], "environ": [1, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "doc": 1, "r": [1, 8, 19, 20], "requir": [1, 8], "txt": [1, 8, 9, 11, 12, 14, 17, 19, 20, 22], "set": [1, 8, 9, 11, 12, 14, 16, 22], "up": [1, 8, 9, 12, 14, 17], "readi": [1, 9, 14], "refer": [1, 2, 8, 9, 11, 12, 14, 17, 19, 20, 22], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 8, 9, 11, 14, 16], "local": [1, 8, 16], "preview": 1, "what": [1, 2, 8, 11], "look": [1, 2, 5, 8, 9, 11, 12, 14], "like": [1, 2, 6, 8, 9, 11, 12, 14, 16, 22], "publish": [1, 10], "html": [1, 2, 8, 16], "gener": [1, 9, 11, 12, 14, 16], "view": [1, 9, 11, 12, 14, 16, 22], "follow": [1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "python3": [1, 8, 16], "m": [1, 11, 16, 19, 20], "server": [1, 6, 8], "It": [1, 2, 4, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "print": [1, 9, 11, 12, 14, 16, 17, 19, 20, 22], "serv": [1, 16], "0": [1, 7, 9, 11, 12, 14, 16, 17, 19, 20, 22], "port": [1, 16], "8000": [1, 22], "open": [1, 10, 11, 14], "browser": [1, 4, 6, 16], "go": [1, 9, 11, 14, 16], "read": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "code": [2, 3, 7, 9, 14, 17, 19, 20, 22], "style": [2, 3, 7], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 8, 9, 11, 14, 16], "recommend": [2, 8, 9, 11, 12, 14], "test": [2, 7, 9, 11, 12, 14, 17, 19, 20], "valid": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "dataset": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "lhots": [2, 7, 9, 11, 14], "readthedoc": [2, 8], "io": [2, 8, 16], "en": [2, 8], "latest": [2, 6, 8, 14, 16, 17], "index": [2, 8, 16], "yesno": [2, 7, 8, 13, 22], "veri": [2, 3, 11, 19, 20, 22], "good": 2, "exampl": [2, 6, 7, 17, 19, 20, 22], "speech": [2, 6, 7, 8, 10, 11, 13, 22], "pull": [2, 9, 11, 14], "380": [2, 20], "show": [2, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "add": [2, 9, 11, 12, 13], "new": [2, 3, 6, 8, 9, 10, 11, 12, 14, 16, 17, 22], "suppos": 2, "would": [2, 8, 14, 17], "name": [2, 9, 11], "foo": [2, 9, 14, 16], "eg": [2, 5, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "mkdir": [2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "p": [2, 8, 11, 19, 20], "asr": [2, 5, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "touch": 2, "sh": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "chmod": 2, "x": 2, "simpl": [2, 11], "own": 2, "otherwis": [2, 9, 11, 14, 16], "librispeech": [2, 5, 7, 13, 14, 16, 17], "assum": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "fanci": 2, "call": 2, "bar": [2, 9, 14, 16], "organ": 2, "wai": [2, 3, 16], "readm": [2, 9, 11, 12, 14, 17, 19, 20, 22], "md": [2, 5, 9, 11, 12, 14, 16, 17, 19, 20, 22], "asr_datamodul": [2, 8], "pretrain": [2, 9, 11, 12, 14, 17, 19, 20, 22], "For": [2, 5, 9, 11, 12, 14, 16, 17, 19, 20, 22], "instanc": [2, 5, 9, 11, 12, 14, 16, 17, 19, 20, 22], "tdnn": [2, 8, 10, 13, 15, 18, 21], "its": [2, 11], "directori": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "structur": 2, "descript": [2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "contain": [2, 7, 9, 11, 12, 13, 14, 16, 17, 19, 20, 22], "inform": [2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "g": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "wer": [2, 8, 16, 17, 19, 20, 22], "etc": [2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "provid": [2, 6, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 20, 22], "pytorch": [2, 7, 11], "dataload": [2, 8], "take": [2, 22], "input": [2, 9, 11, 12, 14, 17, 19, 20, 22], "checkpoint": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "save": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "dure": [2, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "stage": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "definit": 2, "neural": [2, 9, 14], "network": [2, 9, 11, 14, 16], "script": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "infer": 2, "tdnn_lstm_ctc": [2, 12, 17, 20], "conformer_ctc": [2, 9, 14], "get": [2, 6, 8, 9, 11, 12, 14, 16, 17, 22], "feel": [2, 16], "result": [2, 5, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "everi": [2, 16], "kept": 2, "self": 2, "toler": 2, "duplic": 2, "among": [2, 8], "differ": [2, 8, 9, 10, 14, 16], "invoc": 2, "help": [2, 9, 11, 12, 14, 16, 17, 19, 20, 22], "blob": [2, 5, 16], "master": [2, 5, 11, 16], "transform": [2, 9, 14, 16], "conform": [2, 10, 11, 13, 15, 16], "base": [2, 9, 11, 12, 14, 16], "lstm": [2, 10, 13, 15, 16, 18], "attent": [2, 11, 12], "lm": [2, 8, 11, 17, 19, 20, 22], "rescor": [2, 12, 17, 19, 20, 22], "demonstr": [2, 4, 6], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 9, 11, 12, 14, 16], "mani": 3, "two": [3, 9, 11, 12, 14, 16, 17, 19, 20, 22], "them": [3, 4, 5, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "To": [3, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "document": [3, 7], "repositori": 3, "recip": [3, 5, 7, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "In": [3, 6, 9, 11, 12, 14, 17, 19, 20, 22], "page": [3, 6, 9, 11, 12, 13, 14, 16, 17, 19, 20, 22], "describ": [3, 4, 9, 11, 12, 14, 17, 19, 20], "how": [3, 4, 6, 7, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "creat": [3, 7, 9, 11, 12, 14, 16, 17, 19, 20, 22], "data": [3, 10], "train": [3, 4, 6, 7], "decod": [3, 6], "model": [3, 4, 6, 7, 8], "section": [4, 8, 9, 14], "find": [4, 5, 6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "also": [4, 5, 8, 9, 11, 12, 14, 16, 22], "try": [4, 6, 16], "from": [4, 6, 8, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "within": [4, 6], "without": [4, 6, 9, 14], "anyth": [4, 6], "space": [4, 7], "youtub": [4, 7, 14, 16, 17], "video": [4, 7, 14, 16, 17], "upload": [5, 6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "huggingfac": [5, 7, 9, 11, 12, 14, 16, 17, 19, 20, 22], "co": [5, 6, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "visit": [5, 6, 16], "link": [5, 8, 16], "search": [5, 6], "specif": [5, 11], "correspond": [5, 6], "aishel": [5, 7, 9, 11, 12, 13], "gigaspeech": [5, 16], "wenetspeech": 5, "integr": 6, "framework": 6, "sherpa": [6, 16], "need": [6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "download": [6, 7, 10], "window": [6, 16], "maco": [6, 16], "linux": [6, 16], "even": [6, 8], "ipad": 6, "phone": 6, "start": [6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "address": [6, 11, 16], "recognit": [6, 7, 10, 11, 13, 22], "screenshot": [6, 9, 11, 12, 14, 16, 22], "select": [6, 16, 17, 19, 20, 22], "languag": [6, 9, 11, 12], "current": [6, 8, 11, 13, 16], "chines": [6, 10, 11], "english": [6, 22], "target": 6, "method": [6, 8, 9, 11, 12, 14, 16, 17, 19, 20], "greedi": 6, "modified_beam_search": [6, 11, 16], "choos": [6, 8, 16], "number": [6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "activ": 6, "path": [6, 9, 11, 12, 14, 16], "either": [6, 9, 11, 12, 14], "record": [6, 9, 10, 11, 12, 14, 16, 17, 19, 20, 22], "click": [6, 8, 9, 11, 12, 14, 16, 22], "button": 6, "submit": 6, "wait": 6, "moment": 6, "an": [6, 8, 9, 10, 11, 14, 16, 22], "when": [6, 11, 14], "bottom": [6, 16], "part": [6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "tabl": 6, "one": [6, 9, 11, 12, 14, 16, 17, 19, 20, 22], "next": [6, 8, 14, 16, 17], "gen": [6, 8, 14, 16, 17], "kaldi": [6, 8, 14, 16, 17], "subscrib": [6, 8, 14, 16, 17], "channel": [6, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "nadira": [6, 8, 14, 16, 17], "povei": [6, 8, 14, 16, 17], "www": [6, 8, 10, 14, 16, 17], "uc_vaumpkminz1pnkfxan9mw": [6, 8, 14, 16, 17], "torchaudio": 7, "1": [7, 16, 17, 19, 20, 22], "timit": [7, 13, 19, 20], "contribut": 7, "depend": [8, 9, 14], "step": [8, 9, 11, 12, 14, 16, 22], "order": [8, 9, 12, 14, 17, 19, 20], "matter": 8, "org": [8, 10, 11, 16], "least": 8, "v1": [8, 9, 12, 14, 17, 19, 20], "alreadi": 8, "don": [8, 9, 12, 14, 16, 17, 19, 20, 22], "t": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "replac": 8, "compil": [8, 9, 11, 14], "against": 8, "strongli": 8, "collect": 8, "variabl": [8, 9, 12, 14, 16], "pythonpath": 8, "point": [8, 9, 12, 14, 16], "folder": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "tmp": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "setup": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "export": [8, 9, 11, 12, 14, 17, 19, 20, 22], "put": 8, "sever": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "same": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "switch": [8, 9, 14], "just": 8, "about": [8, 11, 16], "virtualenv": 8, "8": [8, 9, 11, 14, 16, 17, 22], "cpython3": 8, "6": [8, 9, 11, 14, 16, 17, 19, 20], "final": [8, 14, 17], "64": [8, 11], "1540m": 8, "creator": 8, "cpython3posix": 8, "dest": 8, "ceph": [8, 9, 11, 14], "fj": [8, 11, 14], "fangjun": [8, 11, 14], "clear": 8, "fals": [8, 9, 11, 14], "no_vcs_ignor": 8, "global": 8, "seeder": 8, "fromappdata": 8, "bundl": 8, "setuptool": 8, "wheel": 8, "via": 8, "copi": 8, "app_data_dir": 8, "root": 8, "share": 8, "v": [8, 14, 19, 20], "irtualenv": 8, "ad": [8, 9, 11, 12, 14, 16, 22], "seed": 8, "packag": 8, "57": [8, 14, 17], "36": [8, 11, 14], "bashactiv": 8, "cshellactiv": 8, "fishactiv": 8, "powershellactiv": 8, "pythonactiv": 8, "xonshactiv": 8, "sourc": [8, 9, 10, 11, 14], "bin": [8, 9, 14, 16], "dev20210822": 8, "cpu": [8, 9, 22], "torch1": 8, "f": [8, 19, 20], "nightli": 8, "whl": [8, 16], "2bcpu": 8, "cp38": 8, "linux_x86_64": 8, "mb": 8, "________________________________": 8, "185": [8, 9, 14, 22], "kb": [8, 19, 20], "graphviz": 8, "17": [8, 9, 14, 16, 19, 20], "py3": 8, "none": [8, 9, 14], "18": [8, 9, 11, 12, 14, 16, 17, 19, 20], "torch": [8, 9, 11, 14], "cach": 8, "manylinux1_x86_64": 8, "831": [8, 11, 20], "type": [8, 9, 11, 14, 16, 22], "extens": 8, "typing_extens": 8, "10": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "26": [8, 11, 14, 20], "successfulli": 8, "probabl": [8, 11, 16], "cuda": [8, 9, 11, 12, 14, 16, 17, 19, 20], "req": 8, "7b1b76ge": 8, "q": 8, "audioread": 8, "soundfil": 8, "post1": 8, "py2": 8, "7": [8, 9, 12, 14, 16, 17, 19, 20], "97": [8, 9], "cytoolz": 8, "11": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "manylinux_2_17_x86_64": 8, "manylinux2014_x86_64": 8, "dataclass": 8, "14": [8, 9, 14, 16, 17, 19], "h5py": 8, "manylinux_2_12_x86_64": 8, "manylinux2010_x86_64": 8, "684": [8, 9, 22], "intervaltre": 8, "lilcom": 8, "numpi": 8, "15": [8, 11, 12, 14, 19, 22], "40": [8, 12, 14, 17, 19, 20], "pyyaml": 8, "662": 8, "tqdm": 8, "62": [8, 14, 17], "76": [8, 22], "73": 8, "satisfi": 8, "lib": 8, "site": 8, "dev": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "2a1410b": 8, "clean": [8, 9, 11, 14, 16, 17], "toolz": 8, "55": [8, 12, 14, 19], "sortedcontain": 8, "29": [8, 9, 11, 12, 14, 17, 19, 20], "cffi": 8, "411": [8, 14], "pycpars": 8, "20": [8, 9, 11, 12, 14, 17, 19, 20, 22], "112": 8, "pypars": 8, "67": 8, "done": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "filenam": [8, 16], "dev_2a1410b_clean": 8, "size": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "342242": 8, "sha256": 8, "f683444afa4dc0881133206b4646a": 8, "9d0f774224cc84000f55d0a67f6e4a37997": 8, "store": [8, 14], "ephem": 8, "ftu0qysz": 8, "7f": 8, "7a": 8, "8e": 8, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 8, "warn": 8, "built": 8, "invalid": [8, 14], "metadata": [8, 19, 20], "mandat": 8, "pep": 8, "440": 8, "packa": 8, "ging": 8, "deprec": [8, 11], "legaci": 8, "becaus": 8, "could": [8, 9, 12], "A": [8, 9, 11, 12, 14, 16, 17], "discuss": 8, "regard": 8, "pypa": 8, "sue": 8, "8368": 8, "inter": 8, "valtre": 8, "sor": 8, "tedcontain": 8, "remot": 8, "enumer": 8, "object": [8, 9, 11, 12, 16, 22], "500": [8, 11, 14, 16], "count": 8, "100": [8, 9, 11, 12, 14, 16], "compress": 8, "308": [8, 9, 11, 12], "total": [8, 9, 11, 12, 14, 16, 17, 22], "delta": 8, "263": 8, "reus": 8, "307": 8, "102": [8, 9], "pack": 8, "receiv": 8, "172": 8, "49": [8, 14, 20, 22], "kib": 8, "385": 8, "00": [8, 9, 11, 12, 14, 17, 19, 20, 22], "resolv": 8, "kaldilm": 8, "tar": 8, "gz": 8, "48": [8, 9, 11], "574": 8, "kaldialign": 8, "sentencepiec": [8, 14], "96": 8, "tensorboard": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "41": [8, 9, 11, 19, 22], "line": 8, "absl": 8, "absl_pi": 8, "13": [8, 11, 12, 14, 17, 19], "132": 8, "googl": [8, 16], "auth": 8, "oauthlib": 8, "google_auth_oauthlib": 8, "grpcio": 8, "24": [8, 12, 17, 19, 20, 22], "39": [8, 11, 14, 17, 19], "ment": 8, "requi": 8, "rement": 8, "protobuf": 8, "manylinux_2_5_x86_64": 8, "werkzeug": 8, "288": 8, "tensorboard_data_serv": 8, "google_auth": 8, "35": [8, 11, 14, 16], "152": 8, "request": 8, "plugin": 8, "wit": 8, "tensorboard_plugin_wit": 8, "781": 8, "markdown": 8, "six": 8, "16": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "cachetool": 8, "rsa": 8, "34": 8, "pyasn1": 8, "modul": 8, "pyasn1_modul": 8, "155": 8, "requests_oauthlib": 8, "23": [8, 9, 11, 12, 14, 19, 20, 22], "77": [8, 14], "urllib3": 8, "27": [8, 9, 11, 17, 20], "138": [8, 9, 11], "certifi": 8, "2017": 8, "2021": [8, 9, 12, 14, 17, 19, 20, 22], "30": [8, 9, 11, 12, 14, 16, 22], "145": 8, "charset": 8, "normal": [8, 17, 19, 20, 22], "charset_norm": 8, "idna": 8, "59": [8, 12, 14], "146": 8, "897233": 8, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 8, "f4c0d0cbc66eee6c88d68a63862": 8, "85": 8, "7d": 8, "63": [8, 11], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 8, "etool": 8, "oaut": 8, "hlib": 8, "let": [8, 9, 14, 16], "u": [8, 9, 11, 12, 14, 16, 22], "log": [8, 17, 19, 20, 22], "08": [8, 14, 16, 17, 19, 20, 22], "19": [8, 9, 14, 17, 19, 20], "main": [8, 9, 14], "dl_dir": [8, 9, 12, 14, 16], "waves_yesno": 8, "49mb": 8, "03": [8, 11, 14, 16, 19, 20], "39mb": 8, "manifest": 8, "31": [8, 14], "42": [8, 9, 14, 22], "comput": [8, 9, 11, 12, 16, 17, 19, 20, 22], "fbank": [8, 9, 11, 12, 14, 17, 19, 20, 22], "32": [8, 9, 11, 12], "803": 8, "info": [8, 9, 11, 12, 14, 17, 19, 20, 22], "compute_fbank_yesno": 8, "52": [8, 9, 14], "process": [8, 9, 11, 12, 14, 16], "extract": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "featur": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "_______________________________________________________________": 8, "90": 8, "01": [8, 11, 12, 14], "80": [8, 9, 11, 14], "57it": 8, "085": 8, "______________________________________________________________": 8, "248": [8, 11], "21it": 8, "lang": [8, 11, 14], "fcordre9": 8, "kaldilm_6899d26f2d684ad48f21025950cd2866": 8, "csrc": [8, 14], "arpa_file_pars": 8, "cc": 8, "void": 8, "arpafilepars": 8, "rea": 8, "d": [8, 19, 20], "std": 8, "istream": 8, "79": 8, "140": [8, 12], "gram": [8, 9, 11, 12, 14, 17, 19, 20], "89": [8, 9], "hlg": [8, 17, 19, 20, 22], "928": 8, "compile_hlg": 8, "120": 8, "lang_phon": [8, 12, 17, 19, 20, 22], "929": [8, 11], "lexicon": [8, 9, 11, 12, 14, 16, 22], "116": 8, "convert": [8, 14, 16], "l": [8, 11, 16, 19, 20, 22], "pt": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "linv": [8, 11, 14, 22], "931": 8, "ctc_topo": 8, "max_token_id": 8, "932": 8, "load": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "fst": [8, 11, 22], "intersect": 8, "933": 8, "lg": 8, "shape": 8, "66": 8, "connect": [8, 14, 16, 17], "68": [8, 14], "70": 8, "class": [8, 14], "tensor": [8, 9, 11, 12, 14, 16, 22], "71": [8, 14, 17], "determin": 8, "934": 8, "74": 8, "_k2": 8, "raggedint": 8, "remov": [8, 9, 11, 12, 14, 17, 19, 20], "disambigu": 8, "symbol": [8, 11], "87": 8, "remove_epsilon": 8, "935": 8, "92": [8, 14], "arc": 8, "95": [8, 10], "compos": 8, "h": 8, "105": [8, 14], "936": 8, "107": [8, 17], "123": 8, "now": [8, 9, 14, 16, 17, 19, 20], "cuda_visible_devic": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "gpu": [8, 9, 11, 12, 14, 16, 19, 20, 22], "avail": [8, 9, 11, 14, 16, 17, 19, 20, 22], "given": [8, 9, 11, 12, 14, 17], "below": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "072": 8, "465": 8, "466": 8, "exp_dir": [8, 11, 14], "posixpath": [8, 11, 14], "exp": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "lang_dir": [8, 11, 14], "lr": [8, 11, 16], "feature_dim": [8, 9, 11, 14, 22], "weight_decai": 8, "1e": 8, "06": [8, 12, 14, 17, 22], "start_epoch": 8, "best_train_loss": 8, "inf": 8, "best_valid_loss": 8, "best_train_epoch": 8, "best_valid_epoch": 8, "batch_idx_train": 8, "log_interv": 8, "valid_interv": 8, "beam_siz": [8, 11], "reduct": 8, "sum": 8, "use_doub": 8, "le_scor": 8, "true": [8, 9, 11, 14, 16, 17, 19, 20, 22], "world_siz": 8, "master_port": 8, "12354": 8, "num_epoch": 8, "feature_dir": [8, 14], "max_dur": [8, 14], "bucketing_sampl": [8, 14], "num_bucket": [8, 14], "concatenate_cut": [8, 14], "duration_factor": [8, 14], "gap": [8, 14], "on_the_fly_feat": [8, 14], "shuffl": [8, 14], "return_cut": [8, 14], "num_work": [8, 14], "074": 8, "113": [8, 11, 14], "098": [8, 17], "cut": [8, 14], "240": [8, 9, 22], "149": [8, 14], "200": [8, 9, 14, 19, 20, 22], "singlecutsampl": 8, "206": [8, 14], "219": [8, 11, 14], "246": [8, 11, 14, 19, 20], "357": 8, "416": 8, "epoch": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "batch": [8, 9, 11, 12, 14, 16], "avg": [8, 11, 14, 16, 17, 19, 20, 22], "loss": [8, 9, 12, 14, 16, 17, 19, 20, 22], "0789": 8, "848": 8, "5356": 8, "7556": 8, "301": [8, 14], "432": [8, 14], "9972": 8, "best": [8, 9, 12, 14], "805": 8, "2436": 8, "5717": 8, "33": [8, 9, 10, 11, 14, 19], "109": [8, 9, 14], "4167": 8, "121": [8, 17], "325": 8, "2214": 8, "798": [8, 11], "0781": 8, "1343": 8, "065": 8, "0859": 8, "556": 8, "0421": 8, "0975": 8, "810": 8, "0431": 8, "824": 8, "657": 8, "0109": 8, "984": [8, 14], "0093": 8, "0096": 8, "50": [8, 14, 16, 19], "239": [8, 11], "0104": 8, "0101": 8, "569": 8, "0092": 8, "819": [8, 19], "835": 8, "51": [8, 9, 14, 22], "024": 8, "0105": 8, "317": 8, "0099": 8, "0097": 8, "552": 8, "0108": 8, "869": 8, "0102": 8, "126": [8, 14], "128": [8, 14], "537": [8, 14], "192": [8, 14], "249": 8, "250": [8, 11, 17], "lm_dir": [8, 14], "search_beam": [8, 9, 14, 22], "output_beam": [8, 9, 14, 22], "min_active_st": [8, 9, 14, 22], "max_active_st": [8, 9, 14, 22], "10000": [8, 9, 14, 22], "use_double_scor": [8, 9, 14, 22], "193": 8, "213": [8, 22], "259": [8, 9], "devic": [8, 9, 11, 12, 14, 17, 19, 20, 22], "217": [8, 9, 14], "279": [8, 14], "averag": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "userwarn": [8, 11], "floor_divid": 8, "futur": [8, 11, 13], "round": [8, 11], "toward": [8, 11], "trunc": [8, 11], "function": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "NOT": [8, 9, 11, 14, 22], "floor": [8, 11], "incorrect": [8, 11], "neg": [8, 11], "valu": [8, 9, 11, 12, 14, 16], "keep": [8, 11], "behavior": [8, 11], "div": [8, 11], "b": [8, 11, 14, 19, 20], "rounding_mod": [8, 11], "actual": [8, 9, 11, 12, 14, 16], "divis": [8, 11], "trigger": 8, "intern": 8, "aten": 8, "src": [8, 16], "nativ": 8, "binaryop": 8, "cpp": 8, "450": [8, 9, 11, 12], "k": [8, 16, 19, 20], "n": [8, 9, 14, 16, 19, 20], "220": [8, 11, 12, 14], "409": 8, "190": [8, 17], "until": [8, 14], "571": [8, 14], "228": [8, 14], "transcript": [8, 9, 10, 11, 12, 14, 17, 19, 20], "recog": [8, 11, 14], "test_set": [8, 22], "572": 8, "util": [8, 14], "ins": [8, 14, 22], "del": [8, 14, 22], "sub": [8, 14, 22], "573": 8, "236": 8, "wrote": [8, 14], "detail": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "error": [8, 14], "stat": [8, 14], "err": [8, 11, 14], "299": 8, "congratul": [8, 9, 12, 14, 17, 19, 20, 22], "first": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "fun": 8, "debug": 8, "variou": [8, 13], "problem": 8, "mai": [8, 9, 11, 12, 13, 14, 16], "encount": [8, 9, 11, 12, 14, 16], "while": [8, 9, 11, 12, 14, 16], "tutori": [9, 11, 12, 14, 16, 17, 19, 20], "learn": [9, 11, 12, 14, 16, 17, 19, 20, 22], "singl": [9, 11, 12, 14, 16, 17, 19, 20, 22], "multipl": [9, 11, 12, 14, 17, 19, 20, 22], "1best": [9, 12, 14, 17, 19, 20], "handl": [9, 12, 14, 16, 17, 19, 20, 22], "automag": [9, 12, 14, 16, 17, 19, 20, 22], "stop": [9, 11, 12, 14, 16, 17, 19, 20, 22], "control": [9, 11, 12, 14, 16, 17, 19, 20, 22], "By": [9, 12, 14, 16, 17, 19, 20, 22], "default": [9, 11, 12, 14, 16, 17, 19, 20, 22], "execut": [9, 12, 14, 16, 17, 19, 20, 22], "mean": [9, 11, 12, 14, 16, 17, 19, 20, 22], "onli": [9, 11, 12, 13, 14, 16, 17, 19, 20, 22], "musan": [9, 12, 14, 16], "sai": [9, 11, 12, 14, 16, 17, 19, 20, 22], "thei": [9, 11, 12, 14, 16], "won": [9, 12, 14, 16], "re": [9, 12, 14, 16], "intal": [9, 12], "initi": [9, 12], "lf": [9, 11, 12, 14, 17, 19, 20, 22], "sudo": [9, 12], "apt": [9, 12], "permiss": [9, 12], "binari": [9, 11, 12, 14, 16, 22], "here": [9, 11, 12, 14, 17], "pass": [9, 11, 12, 14, 16], "commandlin": [9, 11, 12, 14, 16], "quit": [9, 11, 12, 14, 16], "often": [9, 11, 12, 14, 16], "dir": [9, 11, 12, 14, 16, 17, 19, 20, 22], "experi": [9, 11, 12, 14, 16, 22], "num": [9, 11, 12, 14, 16], "resum": [9, 11, 12, 14, 16, 17, 19, 20, 22], "state": [9, 11, 12, 14, 16], "world": [9, 11, 12, 14, 16, 17], "multi": [9, 11, 12, 14, 16], "machin": [9, 11, 12, 14, 16], "ddp": [9, 11, 12, 14, 16], "4": [9, 11, 12, 14, 16, 17, 19, 20, 22], "implement": [9, 11, 12, 14, 16], "present": [9, 11, 12, 14, 16], "later": [9, 12, 14, 16, 17, 19, 20], "max": [9, 11, 12, 14, 16], "durat": [9, 11, 12, 14, 16, 17, 19, 20, 22], "specifi": [9, 11, 12, 14, 16, 17, 19, 20, 22], "second": [9, 11, 12, 14, 16, 22], "over": [9, 11, 12, 14, 16], "utter": [9, 11, 12, 14, 16], "pad": [9, 11, 12, 14, 16], "oom": [9, 11, 12, 14, 16], "reduc": [9, 11, 12, 14, 16, 17, 19, 20, 22], "v100": [9, 11, 12, 14], "nvidia": [9, 11, 12, 14], "due": [9, 11, 12, 14, 16], "usual": [9, 11, 12, 14, 16], "larger": [9, 11, 12, 14, 16], "than": [9, 11, 12, 14, 16, 17, 22], "caus": [9, 11, 12, 14, 16], "smaller": [9, 11, 12, 14, 16], "increas": [9, 11, 12, 14, 16], "tune": [9, 11, 12, 14, 16], "weight": [9, 12, 14, 16], "decai": [9, 12, 14, 16], "warmup": [9, 11, 12, 14, 16], "get_param": [9, 11, 12, 14, 16, 17, 19, 20, 22], "paramet": [9, 11, 12, 14, 16, 17, 19, 20], "realli": [9, 12, 14, 16], "directli": [9, 11, 12, 14, 16], "perturb": [9, 11, 12, 14, 16], "speed": [9, 11, 12, 14, 16], "factor": [9, 11, 12, 14, 16], "each": [9, 11, 12, 14, 16], "3x150": [9, 11, 12], "hour": [9, 11, 12, 14, 16], "These": [9, 11, 12, 14, 16, 17, 19, 20, 22], "state_dict": [9, 11, 12, 14, 17, 19, 20, 22], "optim": [9, 11, 12, 14, 16, 17, 19, 20, 22], "rate": [9, 11, 12, 14, 16, 17, 19, 20, 22], "visual": [9, 11, 12, 14, 16, 17, 19, 20, 22], "logdir": [9, 11, 12, 14, 16, 17, 19, 20, 22], "labelsmooth": 9, "someth": [9, 11, 12, 14, 16, 22], "tensorflow": [9, 11, 12, 14, 16, 22], "found": [9, 11, 12, 14, 16, 22], "continu": [9, 11, 12, 14, 16, 22], "press": [9, 11, 12, 14, 16, 22], "ctrl": [9, 11, 12, 14, 16, 22], "engw8ksktzqs24zbv5dgcg": 9, "22t11": 9, "09": [9, 11, 12, 14, 16], "scan": [9, 11, 12, 14, 16, 22], "116068": 9, "scalar": [9, 11, 12, 14, 16, 22], "listen": [9, 11, 12, 16, 22], "note": [9, 11, 12, 14, 16, 17, 19, 20, 22], "url": [9, 11, 12, 14, 16, 22], "output": [9, 11, 12, 14, 16, 17, 19, 20, 22], "xxxx": [9, 11, 12, 14, 16, 17, 19, 20, 22], "text": [9, 11, 12, 14, 16, 17, 19, 20, 22], "saw": [9, 11, 12, 14, 16, 17, 19, 20, 22], "consol": [9, 11, 12, 14, 16, 17, 19, 20, 22], "typic": [9, 11, 12, 14], "avoid": [9, 11, 14], "commonli": [9, 11, 12, 14, 17, 19, 20, 22], "nbest": [9, 14], "scale": [9, 14, 17, 19, 20], "down": [9, 14, 16], "lattic": [9, 12, 14, 17, 19, 20], "score": [9, 14], "more": [9, 14, 16, 22], "uniqu": [9, 14], "pkufool": [9, 12, 17], "icefall_asr_aishell_conformer_ctc": 9, "transcrib": [9, 11, 12, 14, 16], "sound": [9, 11, 12, 14, 17, 19, 20, 22], "csukuangfj": [9, 11, 12, 14, 16, 17, 19, 20, 22], "tree": [9, 11, 12, 14, 16, 17, 19, 20, 22], "lang_char": [9, 11], "token": [9, 11, 12, 14, 17, 19, 20, 22], "word": [9, 11, 12, 14, 17, 19, 20, 22], "test_wav": [9, 11, 12, 14, 17, 19, 20, 22], "bac009s0764w0121": [9, 11, 12], "wav": [9, 11, 12, 14, 16, 19, 20, 22], "bac009s0764w0122": [9, 11, 12], "bac009s0764w0123": [9, 11, 12], "tran": [9, 12, 14, 17, 19, 20], "graph": [9, 12, 14, 17, 19, 20], "id": [9, 12, 14, 17, 19, 20], "conveni": [9, 12, 14], "eo": [9, 12, 14], "easili": [9, 12, 14], "obtain": [9, 11, 12, 14, 17, 19, 20], "25": [9, 14, 19, 20, 22], "84": 9, "list": [9, 11, 12, 14, 17, 19, 20], "soxi": [9, 11, 12, 14, 17, 22], "sampl": [9, 11, 12, 14, 17, 22], "16000": [9, 11, 12, 14, 17, 19, 20], "precis": [9, 11, 12, 14, 17, 22], "bit": [9, 11, 12, 14, 17, 22], "04": [9, 11, 12, 14, 17, 19, 20], "67263": [9, 11, 12], "315": [9, 11, 12, 14, 17], "295": [9, 11, 12, 14], "cdda": [9, 11, 12, 14, 17, 22], "sector": [9, 11, 12, 14, 17, 22], "135k": [9, 11, 12], "256k": [9, 11, 12, 14], "encod": [9, 11, 12, 14, 16, 17, 22], "sign": [9, 11, 12, 14, 22], "integ": [9, 11, 12, 14, 22], "pcm": [9, 11, 12, 14, 22], "65840": [9, 11, 12], "625": [9, 11, 12], "132k": [9, 11, 12], "64000": [9, 11, 12], "300": [9, 11, 12, 14], "128k": [9, 11, 12, 22], "displai": [9, 11, 12, 14], "support": [9, 11, 14, 16], "three": [9, 11], "topologi": [9, 14], "07": [9, 11, 12, 14], "53": [9, 16, 17, 20], "707": [9, 14], "229": 9, "sample_r": [9, 11, 14, 22], "subsampling_factor": [9, 11, 14], "nhead": [9, 11, 14], "attention_dim": [9, 11, 14], "512": [9, 11, 14], "num_decoder_lay": [9, 14], "vgg_frontend": [9, 11, 14], "use_feat_batchnorm": [9, 14], "env_info": [9, 11, 14], "releas": [9, 11, 14], "sha1": [9, 11, 14], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 9, "date": [9, 11, 14], "sun": 9, "sep": 9, "46": [9, 14], "33cfe45": 9, "branch": [9, 11, 14], "d57a873": 9, "dirti": [9, 14], "wed": [9, 11, 14], "nov": [9, 14], "hw": 9, "kangwei": 9, "icefall_aishell3": 9, "k2_releas": 9, "__init__": [9, 11, 14], "tokens_fil": 9, "words_fil": [9, 14, 22], "num_path": [9, 14], "ngram_lm_scal": [9, 14], "attention_decoder_scal": [9, 14], "nbest_scal": [9, 14], "sos_id": [9, 14], "eos_id": [9, 14], "num_class": [9, 14, 22], "4336": [9, 11], "sound_fil": [9, 11, 14, 22], "708": [9, 11, 14, 22], "242": [9, 14], "131": [9, 14], "construct": [9, 11, 12, 14, 17, 19, 20, 22], "134": 9, "269": [9, 19, 20], "275": 9, "241": 9, "293": [9, 14], "704": [9, 19], "369": [9, 14], "\u751a": [9, 11], "\u81f3": [9, 11], "\u51fa": [9, 11], "\u73b0": [9, 11], "\u4ea4": [9, 11], "\u6613": [9, 11], "\u51e0": [9, 11], "\u4e4e": [9, 11], "\u505c": [9, 11], "\u6b62": 9, "\u7684": [9, 11, 12], "\u60c5": [9, 11], "\u51b5": [9, 11], "\u4e00": [9, 11], "\u4e8c": [9, 11], "\u7ebf": [9, 11, 12], "\u57ce": [9, 11], "\u5e02": [9, 11], "\u867d": [9, 11], "\u7136": [9, 11], "\u4e5f": [9, 11, 12], "\u5904": [9, 11], "\u4e8e": [9, 11], "\u8c03": [9, 11], "\u6574": [9, 11], "\u4e2d": [9, 11, 12], "\u4f46": [9, 11, 12], "\u56e0": [9, 11], "\u4e3a": [9, 11], "\u805a": [9, 11], "\u96c6": [9, 11], "\u4e86": [9, 11, 12], "\u8fc7": [9, 11], "\u591a": [9, 11], "\u516c": [9, 11], "\u5171": [9, 11], "\u8d44": [9, 11], "\u6e90": [9, 11], "371": 9, "37": [9, 11, 14, 19], "38": [9, 11, 14, 19], "683": 9, "47": [9, 14], "651": [9, 22], "654": 9, "659": 9, "752": 9, "321": 9, "887": 9, "340": 9, "370": 9, "\u751a\u81f3": [9, 12], "\u51fa\u73b0": [9, 12], "\u4ea4\u6613": [9, 12], "\u51e0\u4e4e": [9, 12], "\u505c\u6b62": 9, "\u60c5\u51b5": [9, 12], "\u4e00\u4e8c": [9, 12], "\u57ce\u5e02": [9, 12], "\u867d\u7136": [9, 12], "\u5904\u4e8e": [9, 12], "\u8c03\u6574": [9, 12], "\u56e0\u4e3a": [9, 12], "\u805a\u96c6": [9, 12], "\u8fc7\u591a": [9, 12], "\u516c\u5171": [9, 12], "\u8d44\u6e90": [9, 12], "372": 9, "recor": [9, 14], "highest": [9, 14], "05": [9, 11, 12, 14, 20], "965": 9, "966": 9, "821": 9, "822": 9, "826": 9, "916": 9, "115": [9, 14], "345": 9, "888": 9, "889": 9, "limit": [9, 11, 14], "memori": [9, 11, 14], "upgrad": [9, 14], "pro": [9, 14], "finish": [9, 11, 12, 14, 17, 19, 20, 22], "deploi": [9, 14], "At": [9, 14], "doe": [9, 11, 14, 22], "stream": [9, 14, 16, 19, 20], "home": [9, 14], "checkout": [9, 14], "v2": [9, 14], "cmake": [9, 14], "dcmake_build_typ": [9, 14], "j": [9, 14], "hlg_decod": [9, 14], "four": [9, 14], "ln": [9, 14, 16], "messag": [9, 14, 16], "nn_model": [9, 14], "use_gpu": [9, 14], "word_tabl": [9, 14], "wave": [9, 14], "caution": [9, 14], "relat": [9, 11, 14, 17, 19, 20, 22], "forward": [9, 14], "cpu_jit": [9, 14], "cu": [9, 14], "int": [9, 14], "char": [9, 14], "124": [9, 14], "98": 9, "142": [9, 12, 14], "150": [9, 14], "693": [9, 19], "165": [9, 14], "nnet_output": [9, 14], "182": [9, 17], "180": [9, 14], "489": 9, "45": [9, 11, 14], "216": [9, 14, 19, 20], "mandarin": 10, "corpu": 10, "beij": 10, "shell": 10, "technologi": 10, "ltd": 10, "400": 10, "peopl": 10, "accent": 10, "area": 10, "china": 10, "invit": 10, "particip": 10, "conduct": 10, "quiet": 10, "indoor": 10, "high": 10, "fidel": 10, "microphon": 10, "downsampl": 10, "16khz": 10, "manual": 10, "accuraci": 10, "through": 10, "profession": 10, "annot": 10, "strict": 10, "inspect": 10, "free": [10, 16], "academ": 10, "hope": [10, 14, 17], "moder": 10, "amount": 10, "research": 10, "field": 10, "openslr": 10, "ctc": [10, 13, 15, 18, 21], "stateless": [10, 13, 16], "transduc": [10, 13, 15], "instead": 11, "rnn": [11, 16], "As": [11, 14], "head": 11, "dim": 11, "layer": [11, 16], "feedforward": 11, "2048": 11, "embed": [11, 16], "conv1d": [11, 16], "kernel": 11, "left": 11, "context": [11, 16], "joiner": [11, 16], "nn": [11, 16], "tanh": 11, "linear": 11, "borrow": 11, "ieeexplor": 11, "ieee": 11, "stamp": 11, "jsp": 11, "arnumb": 11, "9054419": 11, "predict": 11, "modif": 11, "right": 11, "charact": 11, "unit": 11, "whose": [11, 14, 17], "vocabulari": 11, "87939824": 11, "88": 11, "optimized_transduc": 11, "extra": 11, "technqiu": 11, "propos": 11, "improv": 11, "end": [11, 16, 22], "furthermor": 11, "maximum": 11, "emit": 11, "per": 11, "frame": 11, "simplifi": 11, "significantli": 11, "degrad": 11, "perform": 11, "exactli": 11, "benchmark": 11, "unprun": 11, "advantag": 11, "minim": 11, "pruned_transducer_stateless": 11, "prune": [11, 16], "other": [11, 13, 14, 17, 19, 20, 22], "altern": 11, "though": 11, "transducer_stateless_modifi": 11, "option": [11, 17, 19, 20, 22], "pr": 11, "gb": 11, "ram": 11, "small": [11, 19, 20, 22], "tri": 11, "prob": [11, 16], "appli": 11, "configur": [11, 17, 19, 20, 22], "c": [11, 12, 16, 22], "lagz6hrcqxoigbfd5e0y3q": 11, "2022": [11, 16], "03t14": 11, "8477": 11, "greedy_search": [11, 16], "sym": 11, "beam_search": 11, "decoding_method": 11, "beam_4": 11, "02": [11, 14, 16, 20], "28": [11, 14, 17], "ensur": 11, "give": 11, "poor": 11, "531": [11, 12], "994": [11, 14], "176": [11, 14], "027": 11, "encoder_out_dim": 11, "dim_feedforward": 11, "num_encoder_lay": 11, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 11, "thu": [11, 14, 17], "feb": 11, "miss": [11, 14], "50d2281": 11, "mar": 11, "hostnam": 11, "de": 11, "74279": 11, "0815224919": 11, "75d558775b": 11, "mmnv8": 11, "ip": 11, "177": [11, 12, 14], "72": [11, 14], "context_s": 11, "max_sym_per_fram": 11, "blank_id": 11, "vocab_s": 11, "878": [11, 20], "257": [11, 19, 20], "880": 11, "267": [11, 19, 20], "891": 11, "273": 11, "__floordiv__": 11, "length": 11, "x_len": 11, "163": [11, 14], "320": 11, "\u6ede": 11, "322": 11, "285": [11, 14], "759": 11, "760": 11, "919": 11, "922": 11, "046": 11, "047": 11, "319": [11, 14], "214": [11, 14], "215": [11, 14, 17], "402": 11, "topk_hyp_index": 11, "topk_index": 11, "logit": 11, "583": [11, 20], "2000": 12, "lji9mwuorlow3jkdhxwk8a": 12, "13t11": 12, "4454": 12, "icefall_asr_aishell_tdnn_lstm_ctc": 12, "858": [12, 14], "389": [12, 14], "154": 12, "161": [12, 14], "536": 12, "171": [12, 14, 19, 20], "539": 12, "917": 12, "207": [12, 14], "129": 12, "\u505c\u6ede": 12, "222": [12, 14], "task": 13, "well": [13, 22], "ligru": [13, 18], "full": [14, 16], "libri": [14, 16], "960": [14, 16], "subset": [14, 16], "3x960": [14, 16], "2880": [14, 16], "lzgnetjwrxc3yghnmd4kpw": 14, "24t16": 14, "43": 14, "4540": 14, "sentenc": 14, "piec": 14, "And": [14, 16], "neither": 14, "nor": 14, "vocab": 14, "work": 14, "5000": 14, "lang_bpe_500": [14, 16], "44": [14, 19, 20], "033": 14, "538": 14, "full_libri": 14, "406": 14, "464": 14, "548": 14, "776": 14, "652": [14, 22], "109226120": 14, "714": [14, 19], "473": 14, "944": 14, "1328": 14, "54": [14, 17, 19, 20], "443": [14, 17], "2563": 14, "56": [14, 19], "494": 14, "592": 14, "331": [14, 17], "1715": 14, "52576": 14, "1424": 14, "807": 14, "506": 14, "808": [14, 19], "522": 14, "362": 14, "203": 14, "565": 14, "1477": 14, "106": 14, "2922": 14, "208": 14, "4295": 14, "52343": 14, "396": 14, "3584": 14, "433": 14, "680": [14, 19], "jit": 14, "bpe": [14, 16], "_pickl": 14, "unpicklingerror": 14, "kei": 14, "hlg_modifi": 14, "g_4_gram": [14, 17, 19, 20], "1089": [14, 17], "134686": [14, 17], "0001": [14, 17], "1221": [14, 17], "135766": [14, 17], "0002": [14, 17], "reproduc": 14, "our": 14, "106000": [14, 17], "496": [14, 17], "875": [14, 17], "212k": 14, "267440": [14, 17], "1253": [14, 17], "535k": 14, "83": [14, 17], "77200": [14, 17], "361": [14, 17], "154k": 14, "554": 14, "260": 14, "bpe_model": 14, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 14, "tue": 14, "oct": 14, "22": [14, 19, 20, 22], "8d93169": 14, "266": [14, 17], "268": [14, 17], "600": [14, 16], "601": 14, "758": 14, "025": 14, "204": 14, "425": 14, "earli": [14, 17], "nightfal": [14, 17], "THE": [14, 17], "yellow": [14, 17], "lamp": [14, 17], "light": [14, 17], "AND": [14, 17], "THERE": [14, 17], "squalid": [14, 17], "quarter": [14, 17], "OF": [14, 17], "broffel": 14, "god": [14, 17], "AS": [14, 17], "direct": [14, 17], "consequ": [14, 17], "sin": [14, 17], "man": [14, 17], "punish": [14, 17], "had": [14, 17], "her": [14, 17], "love": [14, 17], "child": [14, 17], "ON": [14, 17], "THAT": [14, 17], "dishonor": [14, 17], "osom": 14, "TO": [14, 17], "parent": [14, 17], "forev": [14, 17], "WITH": [14, 17], "race": [14, 17], "descent": [14, 17], "mortal": [14, 17], "BE": [14, 17], "bless": [14, 17], "soul": [14, 17], "IN": [14, 17], "heaven": [14, 17], "yet": [14, 17], "THESE": [14, 17], "thought": [14, 17], "affect": [14, 17], "hester": [14, 17], "prynn": [14, 17], "less": [14, 17, 22], "apprehens": [14, 17], "427": 14, "723": 14, "775": 14, "881": 14, "951": 14, "352": 14, "234": 14, "384": 14, "brothel": [14, 17], "bosom": [14, 17], "whole": [14, 17, 19, 20], "ngram": [14, 17, 19, 20], "Its": 14, "857": 14, "979": 14, "980": 14, "055": 14, "117": 14, "051": 14, "363": 14, "959": [14, 20], "546": 14, "598": 14, "599": [14, 17], "833": 14, "834": 14, "915": 14, "076": 14, "110": 14, "329": 14, "397": 14, "999": 14, "concaten": 14, "bucket": 14, "sampler": 14, "1000": 14, "ctc_decod": 14, "ngram_lm_rescor": 14, "attention_rescor": 14, "kind": [14, 16], "316": 14, "118": 14, "58": 14, "221": 14, "125": [14, 22], "136": 14, "144": 14, "159": [14, 22], "543": 14, "174": 14, "topo": 14, "547": 14, "729": 14, "111": 14, "702": 14, "703": 14, "545": 14, "122": 14, "280": 14, "135": [14, 22], "153": [14, 22], "945": 14, "475": 14, "191": [14, 19, 20], "398": 14, "199": [14, 17], "515": 14, "205": 14, "w": [14, 19, 20], "deseri": 14, "441": 14, "fsaclass": 14, "loadfsa": 14, "const": 14, "string": 14, "c10": 14, "ignor": 14, "non": 14, "attribut": 14, "dummi": 14, "589": 14, "attention_scal": 14, "656": 14, "162": 14, "169": [14, 19, 20], "188": 14, "624": 14, "519": [14, 20], "632": 14, "645": [14, 22], "243": 14, "970": 14, "303": 14, "179": 14, "scroll": 16, "scratch": 16, "paper": 16, "arxiv": 16, "ab": 16, "2206": 16, "13236": 16, "predictor": 16, "joint": 16, "contrari": 16, "convent": 16, "That": 16, "recurr": 16, "sinc": [16, 22], "suitabl": 16, "onlin": 16, "lstm_transducer_stateless": 16, "lstm_transducer_stateless2": 16, "architectur": 16, "lower": 16, "skip": 16, "prepare_giga_speech": 16, "case": 16, "giga": 16, "436000": 16, "438000": 16, "cj2vtpiwqhkn9q1tx6ptpg": 16, "20t15": 16, "4468": 16, "210171": 16, "access": 16, "6008": 16, "localhost": 16, "expos": 16, "proxi": 16, "bind_al": 16, "fp16": 16, "worker": 16, "iter": 16, "suggest": 16, "both": 16, "produc": 16, "lowest": 16, "fast_beam_search": 16, "hidden": 16, "1024": 16, "beam": 16, "474000": 16, "includ": 16, "But": 16, "interest": 16, "468000": 16, "smallest": 16, "9999": 16, "encoder_jit_trac": 16, "decoder_jit_trac": 16, "joiner_jit_trac": 16, "jit_pretrain": 16, "pnnx": 16, "submodul": 16, "updat": 16, "recurs": 16, "init": 16, "bdist_wheel": 16, "lh": 16, "dist": 16, "j4": 16, "pwd": 16, "third": 16, "torchscript": 16, "param": 16, "abl": 16, "static": 16, "librari": 16, "raspberri": 16, "pi": 16, "stateless2": 16, "zengwei": 16, "streaming_asr": 16, "icefall_asr_librispeech_tdnn": 17, "lstm_ctc": 17, "flac": 17, "116k": 17, "140k": 17, "343k": 17, "164k": 17, "105k": 17, "174k": 17, "usag": [17, 19, 20, 22], "pretraind": 17, "168": 17, "170": 17, "581": 17, "584": [17, 20], "209": 17, "791": 17, "245": 17, "099": 17, "methond": [17, 19, 20], "725": 17, "403": 17, "631": 17, "010": 17, "tdnn_ligru_ctc": 19, "enough": [19, 20, 22], "luomingshuang": [19, 20], "icefall_asr_timit_tdnn_ligru_ctc": 19, "pretrained_average_9_25": 19, "fdhc0_si1559": [19, 20], "felc0_si756": [19, 20], "fmgd0_si1564": [19, 20], "ffprobe": [19, 20], "show_format": [19, 20], "nistspher": [19, 20], "database_id": [19, 20], "database_vers": [19, 20], "utterance_id": [19, 20], "dhc0_si1559": [19, 20], "sample_min": [19, 20], "4176": [19, 20], "sample_max": [19, 20], "5984": [19, 20], "bitrat": [19, 20], "258": [19, 20], "audio": [19, 20], "pcm_s16le": [19, 20], "hz": [19, 20], "s16": [19, 20], "256": [19, 20], "elc0_si756": [19, 20], "1546": [19, 20], "1989": [19, 20], "mgd0_si1564": [19, 20], "7626": [19, 20], "10573": [19, 20], "660": 19, "183": [19, 20], "695": 19, "697": 19, "210": [19, 20], "829": 19, "sil": [19, 20], "dh": [19, 20], "ih": [19, 20], "uw": [19, 20], "ah": [19, 20], "ii": [19, 20], "z": [19, 20], "aa": [19, 20], "ei": [19, 20], "dx": [19, 20], "uh": [19, 20], "ng": [19, 20], "th": [19, 20], "eh": [19, 20], "jh": [19, 20], "er": [19, 20], "ai": [19, 20], "hh": [19, 20], "aw": 19, "ae": [19, 20], "705": 19, "715": 19, "720": 19, "251": [19, 20], "348": 19, "ch": 19, "icefall_asr_timit_tdnn_lstm_ctc": 20, "pretrained_average_16_25": 20, "816": 20, "827": 20, "387": 20, "unk": 20, "739": 20, "971": 20, "977": 20, "978": 20, "981": 20, "ow": 20, "ykubhb5wrmosxykid1z9eg": 22, "23t23": 22, "icefall_asr_yesno_tdnn": 22, "l_disambig": 22, "lexicon_disambig": 22, "arpa": 22, "0_0_0_1_0_0_0_1": 22, "0_0_1_0_0_0_1_0": 22, "0_0_1_0_0_1_1_1": 22, "0_0_1_0_1_0_0_1": 22, "0_0_1_1_0_0_0_1": 22, "0_0_1_1_0_1_1_0": 22, "0_0_1_1_1_0_0_0": 22, "0_0_1_1_1_1_0_0": 22, "0_1_0_0_0_1_0_0": 22, "0_1_0_0_1_0_1_0": 22, "0_1_0_1_0_0_0_0": 22, "0_1_0_1_1_1_0_0": 22, "0_1_1_0_0_1_1_1": 22, "0_1_1_1_0_0_1_0": 22, "0_1_1_1_1_0_1_0": 22, "1_0_0_0_0_0_0_0": 22, "1_0_0_0_0_0_1_1": 22, "1_0_0_1_0_1_1_1": 22, "1_0_1_1_0_1_1_1": 22, "1_0_1_1_1_1_0_1": 22, "1_1_0_0_0_1_1_1": 22, "1_1_0_0_1_0_1_1": 22, "1_1_0_1_0_1_0_0": 22, "1_1_0_1_1_0_0_1": 22, "1_1_0_1_1_1_1_0": 22, "1_1_1_0_0_1_0_1": 22, "1_1_1_0_1_0_1_0": 22, "1_1_1_1_0_0_1_0": 22, "1_1_1_1_1_0_0_0": 22, "1_1_1_1_1_1_1_1": 22, "54080": 22, "507": 22, "108k": 22, "No": 22, "ye": 22, "hebrew": 22, "NO": 22, "621": 22, "119": 22, "127": 22, "650": 22, "139": 22, "143": 22, "198": 22, "181": 22, "186": 22, "187": 22, "287": 22, "correctli": 22, "simplest": 22}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": 2, "creat": [2, 8], "recip": [2, 13], "data": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "prepar": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "train": [2, 5, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "decod": [2, 8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "pre": [2, 5, 9, 11, 12, 14, 16, 17, 19, 20, 22], "model": [2, 5, 9, 11, 12, 14, 16, 17, 19, 20, 22], "huggingfac": [4, 6], "space": 6, "youtub": [6, 8], "video": [6, 8], "icefal": [7, 8], "content": [7, 13], "instal": [8, 9, 11, 12, 14, 17, 19, 20], "0": 8, "pytorch": 8, "torchaudio": 8, "1": [8, 9, 11, 12, 14], "k2": 8, "2": [8, 9, 11, 12, 14], "lhots": 8, "3": [8, 9, 11, 14], "download": [8, 9, 11, 12, 14, 16, 17, 19, 20, 22], "exampl": [8, 9, 11, 12, 14, 16], "virtual": 8, "environ": 8, "activ": 8, "your": 8, "4": 8, "5": 8, "test": 8, "conform": [9, 14], "ctc": [9, 12, 14, 17, 19, 20, 22], "configur": [9, 12, 14, 16], "option": [9, 12, 14, 16], "log": [9, 11, 12, 14, 16], "usag": [9, 11, 12, 14, 16], "case": [9, 11, 12, 14], "kaldifeat": [9, 11, 12, 14, 17, 19, 20, 22], "hlg": [9, 12, 14], "attent": [9, 14], "rescor": [9, 14], "colab": [9, 11, 12, 14, 17, 19, 20, 22], "notebook": [9, 11, 12, 14, 17, 19, 20, 22], "deploy": [9, 14], "c": [9, 14], "aishel": 10, "stateless": 11, "transduc": [11, 16], "The": 11, "loss": 11, "todo": 11, "greedi": 11, "search": 11, "beam": 11, "modifi": 11, "tdnn": [12, 17, 19, 20, 22], "lstm": [12, 17, 20], "tabl": 13, "lm": 14, "comput": 14, "wer": 14, "librispeech": 15, "which": 16, "us": 16, "export": 16, "state_dict": 16, "torch": 16, "jit": 16, "trace": 16, "ncnn": 16, "pretrain": 16, "infer": [17, 19, 20, 22], "timit": 18, "ligru": 19, "yesno": 21}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 56}})
\ No newline at end of file