diff --git a/_sources/recipes/TTS/index.rst.txt b/_sources/recipes/TTS/index.rst.txt
index aa891c072..80d67a2f3 100644
--- a/_sources/recipes/TTS/index.rst.txt
+++ b/_sources/recipes/TTS/index.rst.txt
@@ -5,3 +5,4 @@ TTS
    :maxdepth: 2
 
    ljspeech/vits
+   vctk/vits
\ No newline at end of file
diff --git a/_sources/recipes/TTS/ljspeech/vits.rst.txt b/_sources/recipes/TTS/ljspeech/vits.rst.txt
index 385fd3c70..d08aa0f47 100644
--- a/_sources/recipes/TTS/ljspeech/vits.rst.txt
+++ b/_sources/recipes/TTS/ljspeech/vits.rst.txt
@@ -4,6 +4,10 @@ VITS
 This tutorial shows you how to train an VITS model
 with the `LJSpeech <https://keithito.com/LJ-Speech-Dataset/>`_ dataset.
 
+.. note::
+  
+   TTS related recipes require packages in ``requirements-tts.txt``.
+
 .. note::
 
    The VITS paper: `Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech <https://arxiv.org/pdf/2106.06103.pdf>`_
@@ -27,6 +31,12 @@ To run stage 1 to stage 5, use
 Build Monotonic Alignment Search
 --------------------------------
 
+.. code-block:: bash
+
+  $ ./prepare.sh --stage -1 --stop_stage -1
+
+or
+
 .. code-block:: bash
 
   $ cd vits/monotonic_align
@@ -74,7 +84,7 @@ training part first. It will save the ground-truth and generated wavs to the dir
   $ ./vits/infer.py \
       --epoch 1000 \
       --exp-dir vits/exp \
-      --tokens data/tokens.txt
+      --tokens data/tokens.txt \
       --max-duration 500
 
 .. note::
diff --git a/_sources/recipes/TTS/vctk/vits.rst.txt b/_sources/recipes/TTS/vctk/vits.rst.txt
new file mode 100644
index 000000000..34024a5ea
--- /dev/null
+++ b/_sources/recipes/TTS/vctk/vits.rst.txt
@@ -0,0 +1,125 @@
+VITS
+===============
+
+This tutorial shows you how to train an VITS model
+with the `VCTK <https://datashare.ed.ac.uk/handle/10283/3443>`_ dataset.
+
+.. note::
+  
+   TTS related recipes require packages in ``requirements-tts.txt``.
+
+.. note::
+
+   The VITS paper: `Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech <https://arxiv.org/pdf/2106.06103.pdf>`_
+
+
+Data preparation
+----------------
+
+.. code-block:: bash
+
+  $ cd egs/vctk/TTS
+  $ ./prepare.sh
+
+To run stage 1 to stage 6, use
+
+.. code-block:: bash
+
+  $ ./prepare.sh --stage 1 --stop_stage 6
+
+
+Build Monotonic Alignment Search
+--------------------------------
+
+To build the monotonic alignment search, use the following commands:
+
+.. code-block:: bash
+
+  $ ./prepare.sh --stage -1 --stop_stage -1
+
+or
+
+.. code-block:: bash
+
+  $ cd vits/monotonic_align
+  $ python setup.py build_ext --inplace
+  $ cd ../../
+
+
+Training
+--------
+
+.. code-block:: bash
+
+  $ export CUDA_VISIBLE_DEVICES="0,1,2,3"
+  $ ./vits/train.py \
+      --world-size 4 \
+      --num-epochs 1000 \
+      --start-epoch 1 \
+      --use-fp16 1 \
+      --exp-dir vits/exp \
+      --tokens data/tokens.txt
+      --max-duration 350
+
+.. note::
+
+    You can adjust the hyper-parameters to control the size of the VITS model and
+    the training configurations. For more details, please run ``./vits/train.py --help``.
+
+.. note::
+
+    The training can take a long time (usually a couple of days).
+
+Training logs, checkpoints and tensorboard logs are saved in ``vits/exp``.
+
+
+Inference
+---------
+
+The inference part uses checkpoints saved by the training part, so you have to run the
+training part first. It will save the ground-truth and generated wavs to the directory
+``vits/exp/infer/epoch-*/wav``, e.g., ``vits/exp/infer/epoch-1000/wav``.
+
+.. code-block:: bash
+
+  $ export CUDA_VISIBLE_DEVICES="0"
+  $ ./vits/infer.py \
+      --epoch 1000 \
+      --exp-dir vits/exp \
+      --tokens data/tokens.txt \
+      --max-duration 500
+
+.. note::
+
+    For more details, please run ``./vits/infer.py --help``.
+
+
+Export models
+-------------
+
+Currently we only support ONNX model exporting. It will generate two files in the given ``exp-dir``:
+``vits-epoch-*.onnx`` and ``vits-epoch-*.int8.onnx``.
+
+.. code-block:: bash
+
+  $ ./vits/export-onnx.py \
+      --epoch 1000 \
+      --exp-dir vits/exp \
+      --tokens data/tokens.txt
+
+You can test the exported ONNX model with:
+
+.. code-block:: bash
+
+  $ ./vits/test_onnx.py \
+      --model-filename vits/exp/vits-epoch-1000.onnx \
+      --tokens data/tokens.txt
+
+
+Download pretrained models
+--------------------------
+
+If you don't want to train from scratch, you can download the pretrained models
+by visiting the following link:
+
+  - `<https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05>`_
diff --git a/contributing/index.html b/contributing/index.html
index 8645f6487..4cb2377e4 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -22,7 +22,7 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Contributing to Documentation" href="doc.html" />
-    <link rel="prev" title="VITS" href="../recipes/TTS/ljspeech/vits.html" /> 
+    <link rel="prev" title="VITS" href="../recipes/TTS/vctk/vits.html" /> 
 </head>
 
 <body class="wy-body-for-nav"> 
@@ -135,7 +135,7 @@ and code to <code class="docutils literal notranslate"><span class="pre">icefall
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="../recipes/TTS/ljspeech/vits.html" class="btn btn-neutral float-left" title="VITS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../recipes/TTS/vctk/vits.html" class="btn btn-neutral float-left" title="VITS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="doc.html" class="btn btn-neutral float-right" title="Contributing to Documentation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
diff --git a/index.html b/index.html
index 6fe6722c4..c5fe23c74 100644
--- a/index.html
+++ b/index.html
@@ -159,6 +159,7 @@ speech recognition recipes using <a class="reference external" href="https://git
 </li>
 <li class="toctree-l2"><a class="reference internal" href="recipes/TTS/index.html">TTS</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="recipes/TTS/ljspeech/vits.html">VITS</a></li>
+<li class="toctree-l3"><a class="reference internal" href="recipes/TTS/vctk/vits.html">VITS</a></li>
 </ul>
 </li>
 </ul>
diff --git a/objects.inv b/objects.inv
index 879c0771d..86204115b 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/recipes/TTS/index.html b/recipes/TTS/index.html
index f80780b26..fbaa184c2 100644
--- a/recipes/TTS/index.html
+++ b/recipes/TTS/index.html
@@ -59,6 +59,7 @@
 <li class="toctree-l2"><a class="reference internal" href="../RNN-LM/index.html">RNN-LM</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">TTS</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="ljspeech/vits.html">VITS</a></li>
+<li class="toctree-l3"><a class="reference internal" href="vctk/vits.html">VITS</a></li>
 </ul>
 </li>
 </ul>
@@ -110,6 +111,15 @@
 <li class="toctree-l2"><a class="reference internal" href="ljspeech/vits.html#download-pretrained-models">Download pretrained models</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="vctk/vits.html">VITS</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#data-preparation">Data preparation</a></li>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#build-monotonic-alignment-search">Build Monotonic Alignment Search</a></li>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#training">Training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#inference">Inference</a></li>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#export-models">Export models</a></li>
+<li class="toctree-l2"><a class="reference internal" href="vctk/vits.html#download-pretrained-models">Download pretrained models</a></li>
+</ul>
+</li>
 </ul>
 </div>
 </section>
diff --git a/recipes/TTS/ljspeech/vits.html b/recipes/TTS/ljspeech/vits.html
index 6b279ec9d..a983914f7 100644
--- a/recipes/TTS/ljspeech/vits.html
+++ b/recipes/TTS/ljspeech/vits.html
@@ -21,7 +21,7 @@
     <script src="../../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../../genindex.html" />
     <link rel="search" title="Search" href="../../../search.html" />
-    <link rel="next" title="Contributing" href="../../../contributing/index.html" />
+    <link rel="next" title="VITS" href="../vctk/vits.html" />
     <link rel="prev" title="TTS" href="../index.html" /> 
 </head>
 
@@ -67,6 +67,7 @@
 <li class="toctree-l4"><a class="reference internal" href="#download-pretrained-models">Download pretrained models</a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="../vctk/vits.html">VITS</a></li>
 </ul>
 </li>
 </ul>
@@ -112,6 +113,10 @@
 with the <a class="reference external" href="https://keithito.com/LJ-Speech-Dataset/">LJSpeech</a> dataset.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
+<p>TTS related recipes require packages in <code class="docutils literal notranslate"><span class="pre">requirements-tts.txt</span></code>.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
 <p>The VITS paper: <a class="reference external" href="https://arxiv.org/pdf/2106.06103.pdf">Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a></p>
 </div>
 <section id="data-preparation">
@@ -127,6 +132,10 @@ $<span class="w"> </span>./prepare.sh
 </section>
 <section id="build-monotonic-alignment-search">
 <h2>Build Monotonic Alignment Search<a class="headerlink" href="#build-monotonic-alignment-search" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span>-1<span class="w"> </span>--stop_stage<span class="w"> </span>-1
+</pre></div>
+</div>
+<p>or</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>vits/monotonic_align
 $<span class="w"> </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
 $<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>../../
@@ -166,7 +175,7 @@ training part first. It will save the ground-truth and generated wavs to the dir
 $<span class="w"> </span>./vits/infer.py<span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--epoch<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
-<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt<span class="w"> </span><span class="se">\</span>
 <span class="w">    </span>--max-duration<span class="w"> </span><span class="m">500</span>
 </pre></div>
 </div>
@@ -209,7 +218,7 @@ by visiting the following link:</p>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="../index.html" class="btn btn-neutral float-left" title="TTS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="../../../contributing/index.html" class="btn btn-neutral float-right" title="Contributing" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="../vctk/vits.html" class="btn btn-neutral float-right" title="VITS" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
diff --git a/recipes/TTS/vctk/vits.html b/recipes/TTS/vctk/vits.html
new file mode 100644
index 000000000..128a38694
--- /dev/null
+++ b/recipes/TTS/vctk/vits.html
@@ -0,0 +1,248 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>VITS &mdash; icefall 0.1 documentation</title>
+      <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=fa44fd50" />
+      <link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=19f00094" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=e031e9a9"></script>
+        <script src="../../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
+    <script src="../../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../../genindex.html" />
+    <link rel="search" title="Search" href="../../../search.html" />
+    <link rel="next" title="Contributing" href="../../../contributing/index.html" />
+    <link rel="prev" title="VITS" href="../ljspeech/vits.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../../index.html" class="icon icon-home">
+            icefall
+          </a>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../model-export/index.html">Model export</a></li>
+</ul>
+<ul class="current">
+<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Recipes</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="../../Non-streaming-ASR/index.html">Non Streaming ASR</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../Streaming-ASR/index.html">Streaming ASR</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../../RNN-LM/index.html">RNN-LM</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="../index.html">TTS</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="../ljspeech/vits.html">VITS</a></li>
+<li class="toctree-l3 current"><a class="current reference internal" href="#">VITS</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="#data-preparation">Data preparation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#build-monotonic-alignment-search">Build Monotonic Alignment Search</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#training">Training</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#inference">Inference</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#export-models">Export models</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#download-pretrained-models">Download pretrained models</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../contributing/index.html">Contributing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../../huggingface/index.html">Huggingface</a></li>
+</ul>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../../index.html">icefall</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../../index.html">Recipes</a></li>
+          <li class="breadcrumb-item"><a href="../index.html">TTS</a></li>
+      <li class="breadcrumb-item active">VITS</li>
+      <li class="wy-breadcrumbs-aside">
+              <a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/recipes/TTS/vctk/vits.rst" class="fa fa-github"> Edit on GitHub</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="vits">
+<h1>VITS<a class="headerlink" href="#vits" title="Permalink to this heading"></a></h1>
+<p>This tutorial shows you how to train an VITS model
+with the <a class="reference external" href="https://datashare.ed.ac.uk/handle/10283/3443">VCTK</a> dataset.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>TTS related recipes require packages in <code class="docutils literal notranslate"><span class="pre">requirements-tts.txt</span></code>.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The VITS paper: <a class="reference external" href="https://arxiv.org/pdf/2106.06103.pdf">Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a></p>
+</div>
+<section id="data-preparation">
+<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/vctk/TTS
+$<span class="w"> </span>./prepare.sh
+</pre></div>
+</div>
+<p>To run stage 1 to stage 6, use</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">1</span><span class="w"> </span>--stop_stage<span class="w"> </span><span class="m">6</span>
+</pre></div>
+</div>
+</section>
+<section id="build-monotonic-alignment-search">
+<h2>Build Monotonic Alignment Search<a class="headerlink" href="#build-monotonic-alignment-search" title="Permalink to this heading"></a></h2>
+<p>To build the monotonic alignment search, use the following commands:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span>-1<span class="w"> </span>--stop_stage<span class="w"> </span>-1
+</pre></div>
+</div>
+<p>or</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>vits/monotonic_align
+$<span class="w"> </span>python<span class="w"> </span>setup.py<span class="w"> </span>build_ext<span class="w"> </span>--inplace
+$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>../../
+</pre></div>
+</div>
+</section>
+<section id="training">
+<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;0,1,2,3&quot;</span>
+$<span class="w"> </span>./vits/train.py<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--world-size<span class="w"> </span><span class="m">4</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--num-epochs<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--start-epoch<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--use-fp16<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt
+<span class="w">    </span>--max-duration<span class="w"> </span><span class="m">350</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>You can adjust the hyper-parameters to control the size of the VITS model and
+the training configurations. For more details, please run <code class="docutils literal notranslate"><span class="pre">./vits/train.py</span> <span class="pre">--help</span></code>.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The training can take a long time (usually a couple of days).</p>
+</div>
+<p>Training logs, checkpoints and tensorboard logs are saved in <code class="docutils literal notranslate"><span class="pre">vits/exp</span></code>.</p>
+</section>
+<section id="inference">
+<h2>Inference<a class="headerlink" href="#inference" title="Permalink to this heading"></a></h2>
+<p>The inference part uses checkpoints saved by the training part, so you have to run the
+training part first. It will save the ground-truth and generated wavs to the directory
+<code class="docutils literal notranslate"><span class="pre">vits/exp/infer/epoch-*/wav</span></code>, e.g., <code class="docutils literal notranslate"><span class="pre">vits/exp/infer/epoch-1000/wav</span></code>.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">&quot;0&quot;</span>
+$<span class="w"> </span>./vits/infer.py<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--epoch<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--max-duration<span class="w"> </span><span class="m">500</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>For more details, please run <code class="docutils literal notranslate"><span class="pre">./vits/infer.py</span> <span class="pre">--help</span></code>.</p>
+</div>
+</section>
+<section id="export-models">
+<h2>Export models<a class="headerlink" href="#export-models" title="Permalink to this heading"></a></h2>
+<p>Currently we only support ONNX model exporting. It will generate two files in the given <code class="docutils literal notranslate"><span class="pre">exp-dir</span></code>:
+<code class="docutils literal notranslate"><span class="pre">vits-epoch-*.onnx</span></code> and <code class="docutils literal notranslate"><span class="pre">vits-epoch-*.int8.onnx</span></code>.</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./vits/export-onnx.py<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--epoch<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--exp-dir<span class="w"> </span>vits/exp<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt
+</pre></div>
+</div>
+<p>You can test the exported ONNX model with:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./vits/test_onnx.py<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--model-filename<span class="w"> </span>vits/exp/vits-epoch-1000.onnx<span class="w"> </span><span class="se">\</span>
+<span class="w">    </span>--tokens<span class="w"> </span>data/tokens.txt
+</pre></div>
+</div>
+</section>
+<section id="download-pretrained-models">
+<h2>Download pretrained models<a class="headerlink" href="#download-pretrained-models" title="Permalink to this heading"></a></h2>
+<p>If you don’t want to train from scratch, you can download the pretrained models
+by visiting the following link:</p>
+<blockquote>
+<div><ul class="simple">
+<li><p><a class="reference external" href="https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05">https://huggingface.co/zrjin/icefall-tts-vctk-vits-2023-12-05</a></p></li>
+</ul>
+</div></blockquote>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="../ljspeech/vits.html" class="btn btn-neutral float-left" title="VITS" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../../../contributing/index.html" class="btn btn-neutral float-right" title="Contributing" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2021, icefall development team.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/recipes/index.html b/recipes/index.html
index 1b5576ce1..ba9d8a899 100644
--- a/recipes/index.html
+++ b/recipes/index.html
@@ -119,6 +119,7 @@ Currently, we provide recipes for speech recognition, language model, and speech
 </li>
 <li class="toctree-l1"><a class="reference internal" href="TTS/index.html">TTS</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="TTS/ljspeech/vits.html">VITS</a></li>
+<li class="toctree-l2"><a class="reference internal" href="TTS/vctk/vits.html">VITS</a></li>
 </ul>
 </li>
 </ul>
diff --git a/searchindex.js b/searchindex.js
index 0097cba31..825ab96d9 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 36, 37, 40, 44, 45, 47, 49, 51, 57], "tool": [0, 10, 21, 24], "make": [0, 1, 3, 24, 25, 26, 31, 33, 36, 51], "consist": [0, 33, 39, 53, 54, 55], "possibl": [0, 2, 3, 31, 36], "black": 0, "format": [0, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "flake8": 0, "check": [0, 21, 36, 49], "qualiti": [0, 32], "isort": 0, "sort": [0, 21, 49], "import": [0, 9, 10, 15, 21, 24, 54, 55], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 26, 31, 32, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "version": [0, 9, 13, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 54], "abov": [0, 4, 6, 7, 10, 13, 15, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54, 55], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "22": [0, 9, 15, 21, 24, 25, 36, 44, 45, 47], "3": [0, 4, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 27, 30, 34, 37, 39, 40, 41, 42, 47, 49, 53, 54, 55, 57], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "5": [0, 7, 15, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "4": [0, 4, 5, 6, 7, 9, 10, 11, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "10": [0, 7, 9, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 27, 28, 29, 30, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "after": [0, 1, 6, 9, 11, 12, 13, 16, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "run": [0, 2, 8, 10, 11, 13, 14, 15, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "git": [0, 4, 6, 7, 9, 13, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47, 49], "clone": [0, 4, 6, 7, 13, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47, 49], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "github": [0, 2, 6, 9, 11, 13, 15, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "com": [0, 2, 6, 9, 11, 13, 18, 19, 21, 22, 24, 25, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "k2": [0, 2, 9, 10, 13, 15, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "fsa": [0, 2, 9, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 53, 54, 55], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 18, 19, 22, 23, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "pip": [0, 1, 6, 10, 13, 15, 21, 24, 27, 33], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 19, 20, 22, 23, 27, 30, 37, 39, 41, 42, 47, 53, 54, 55], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 17, 19, 20, 21, 23, 30, 37], "commit": [0, 21], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "automat": [0, 14, 19, 37], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 19, 24, 25, 26, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "ani": [0, 4, 6, 7, 13, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 17, 19, 20, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "wa": [0, 22, 36, 40], "success": [0, 21, 24, 25], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "fix": [0, 9, 10, 13, 24, 25, 26, 36], "issu": [0, 4, 6, 7, 10, 21, 24, 25, 36, 37, 54, 55], "report": [0, 9, 10, 37], "some": [0, 1, 4, 6, 22, 24, 25, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "e": [0, 2, 4, 5, 6, 7, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "modifi": [0, 23, 30, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "file": [0, 2, 9, 14, 15, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "place": [0, 21, 22, 33, 36, 40], "so": [0, 4, 6, 7, 9, 13, 19, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 15, 19, 21, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 54, 55], "ha": [0, 2, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 51, 53, 54, 55], "been": [0, 21, 23, 24, 25, 26, 33], "befor": [0, 1, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "all": [0, 9, 11, 13, 14, 18, 19, 22, 24, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "again": [0, 24, 25, 47], "should": [0, 2, 4, 6, 11, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "time": [0, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "succeed": 0, "want": [0, 4, 6, 7, 11, 13, 15, 21, 22, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "do": [0, 2, 4, 6, 13, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 19, 31, 36], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "prepar": [1, 3, 4, 8, 14, 16, 20, 22, 56], "environ": [1, 10, 11, 12, 14, 16, 20, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55], "doc": [1, 22, 51], "r": [1, 13, 21, 24, 25, 26, 44, 45], "requir": [1, 4, 6, 11, 13, 15, 21, 26, 37, 49, 54, 55], "txt": [1, 4, 9, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 49, 57], "set": [1, 4, 6, 7, 10, 12, 13, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 49, 53, 54, 55], "up": [1, 21, 22, 24, 25, 26, 31, 34, 36, 37, 39, 40, 41, 42, 54, 55], "readi": [1, 31, 36, 37, 49], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 21, 22, 23, 24, 25, 26, 28, 29, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 49, 51, 54, 55], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 56], "local": [1, 9, 15, 21, 39, 41, 42, 49, 53, 54, 55], "preview": 1, "what": [1, 2, 11, 15, 21, 24, 25, 26, 33, 51], "look": [1, 2, 4, 6, 7, 14, 18, 21, 24, 25, 26, 31, 33, 34, 36, 37], "like": [1, 2, 9, 11, 19, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54], "publish": [1, 22, 32], "html": [1, 2, 10, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 53, 54, 55], "gener": [1, 6, 9, 14, 15, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55, 57], "view": [1, 8, 20, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "python3": [1, 9, 10, 13, 15, 21, 25, 26], "m": [1, 15, 21, 24, 25, 26, 33, 39, 41, 42, 44, 45, 53, 54, 55], "server": [1, 19, 53], "It": [1, 2, 6, 7, 9, 11, 14, 15, 17, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "print": [1, 12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "serv": [1, 39, 41, 42, 53, 54, 55], "port": [1, 14, 37, 39, 41, 42, 53, 54, 55], "8000": [1, 11, 15, 47], "open": [1, 4, 6, 7, 9, 20, 22, 24, 25, 26, 32, 33, 36, 37], "browser": [1, 17, 19, 39, 41, 42, 53, 54, 55], "go": [1, 7, 31, 33, 36, 39, 41, 42, 53, 54, 55], "read": [2, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "code": [2, 3, 8, 10, 13, 15, 20, 21, 24, 25, 26, 31, 36, 37, 39, 40, 44, 45, 47, 51, 54, 55], "style": [2, 3, 20], "adjust": [2, 49, 57], "design": 2, "python": [2, 9, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 49, 53, 54, 55, 57], "recommend": [2, 6, 7, 21, 31, 33, 34, 36, 37, 39, 54, 55], "test": [2, 4, 9, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 40, 41, 44, 45, 49, 57], "valid": [2, 21, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "dataset": [2, 10, 11, 13, 14, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "lhots": [2, 9, 11, 13, 15, 20, 22, 24, 25, 26, 31, 33, 36], "readthedoc": [2, 11, 21], "io": [2, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 53, 54, 55], "en": [2, 11, 21, 24], "latest": [2, 9, 11, 13, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "index": [2, 21, 23, 24, 25, 26, 27, 28, 29, 53, 54, 55], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 35, 47, 58], "veri": [2, 3, 7, 13, 24, 25, 26, 33, 44, 45, 47, 54, 55], "good": [2, 7], "exampl": [2, 11, 13, 19, 20, 22, 24, 25, 26, 28, 29, 30, 37, 40, 44, 45, 47], "speech": [2, 11, 13, 14, 19, 20, 21, 23, 32, 33, 47, 57, 58], "pull": [2, 4, 6, 7, 9, 24, 25, 26, 27, 31, 33, 36, 49, 51], "380": [2, 24, 45], "show": [2, 4, 6, 7, 9, 15, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "add": [2, 11, 24, 25, 26, 31, 33, 34, 54, 58], "new": [2, 3, 9, 13, 19, 21, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 47, 53, 54, 55], "suppos": [2, 9, 54, 55], "would": [2, 11, 22, 24, 25, 26, 36, 40, 54, 55], "name": [2, 9, 10, 13, 15, 22, 24, 25, 26, 27, 31, 33, 39, 41, 42, 49, 54, 55], "foo": [2, 29, 31, 36, 39, 41, 42, 53, 54, 55], "eg": [2, 9, 10, 11, 12, 15, 16, 18, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "mkdir": [2, 24, 25, 31, 33, 34, 36, 40, 44, 45, 47], "p": [2, 4, 13, 21, 24, 25, 33, 44, 45], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 58], "touch": 2, "sh": [2, 9, 11, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "chmod": 2, "x": [2, 4, 26, 51], "simpl": [2, 12, 14, 16, 21, 33, 49], "own": [2, 11, 37, 39, 49, 54, 55], "otherwis": [2, 24, 25, 26, 31, 33, 36, 37, 39, 41, 42, 53, 54, 55], "librispeech": [2, 4, 6, 7, 10, 18, 20, 22, 24, 25, 26, 27, 28, 29, 35, 36, 37, 39, 40, 41, 42, 49, 50, 51, 53, 54, 55, 58], "assum": [2, 4, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 49, 53, 54, 55], "fanci": 2, "call": [2, 10, 27, 37, 49], "bar": [2, 29, 31, 36, 39, 41, 42, 53, 54, 55], "organ": 2, "wai": [2, 3, 15, 30, 39, 41, 42, 51, 53, 54, 55], "readm": [2, 31, 33, 34, 36, 40, 44, 45, 47], "md": [2, 18, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "asr_datamodul": [2, 9, 10, 15, 21], "pretrain": [2, 4, 6, 7, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 40, 44, 45, 47, 56], "For": [2, 4, 6, 7, 10, 14, 18, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "instanc": [2, 10, 12, 16, 18, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "tdnn": [2, 9, 10, 12, 15, 16, 21, 32, 35, 38, 43, 46], "its": [2, 4, 22, 23, 24, 25, 26, 29, 33, 41, 49], "directori": [2, 9, 11, 13, 20, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "structur": [2, 26], "descript": [2, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "contain": [2, 8, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 58], "inform": [2, 4, 6, 11, 12, 16, 21, 22, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 51, 53, 54, 55], "g": [2, 4, 5, 6, 7, 11, 13, 21, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "wer": [2, 5, 9, 12, 15, 21, 22, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "etc": [2, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "provid": [2, 11, 15, 19, 21, 22, 23, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 58], "pytorch": [2, 10, 13, 21, 24, 25, 26, 33], "dataload": [2, 21], "take": [2, 7, 9, 22, 37, 39, 47, 49, 54, 55, 57], "input": [2, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47, 51], "checkpoint": [2, 4, 6, 7, 12, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "save": [2, 15, 16, 21, 22, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "dure": [2, 4, 5, 7, 10, 13, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "stage": [2, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "": [2, 4, 6, 7, 9, 14, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "definit": [2, 24, 25], "neural": [2, 4, 6, 7, 31, 36, 49], "network": [2, 31, 33, 36, 39, 41, 42, 49, 53, 54, 55], "script": [2, 6, 7, 13, 14, 20, 21, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 49, 53], "infer": [2, 22, 24, 25, 56], "tdnn_lstm_ctc": [2, 34, 40, 45], "conformer_ctc": [2, 31, 36], "get": [2, 9, 13, 14, 15, 19, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 47, 51, 53, 54, 55], "feel": [2, 37, 49, 53], "result": [2, 4, 7, 9, 16, 18, 19, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "everi": [2, 22, 39, 41, 42, 53, 54, 55], "kept": [2, 39, 54, 55], "self": [2, 23, 26, 51], "toler": 2, "duplic": 2, "among": [2, 21], "differ": [2, 12, 21, 24, 25, 26, 27, 31, 32, 36, 37, 39, 51, 53, 54, 55], "invoc": [2, 24, 25], "help": [2, 12, 14, 16, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "blob": [2, 11, 18, 21, 22, 29, 39, 41, 42, 53, 54, 55], "master": [2, 6, 9, 11, 15, 18, 21, 22, 25, 26, 28, 29, 33, 37, 39, 41, 42, 53, 54, 55], "transform": [2, 6, 7, 31, 36, 53], "conform": [2, 28, 32, 33, 35, 38, 39, 41, 53, 54, 55], "base": [2, 4, 7, 13, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55], "lstm": [2, 23, 29, 30, 32, 35, 38, 43, 50, 52], "attent": [2, 26, 33, 34, 37, 51, 54, 55], "lm": [2, 4, 5, 7, 9, 11, 20, 21, 33, 39, 40, 44, 45, 47, 49, 54, 55, 58], "rescor": [2, 5, 20, 34, 40, 42, 44, 45, 47, 49], "demonstr": [2, 14, 15, 17, 19, 22, 27], "consid": [2, 4, 26], "colab": [2, 21], "notebook": [2, 21], "welcom": 3, "There": [3, 4, 15, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "mani": [3, 12, 21, 54, 55], "two": [3, 4, 11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "them": [3, 5, 6, 17, 18, 19, 24, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "To": [3, 4, 5, 6, 7, 11, 15, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "document": [3, 20, 22, 23, 24, 25, 26, 27, 42], "repositori": [3, 9, 24, 25, 26, 27], "recip": [3, 4, 6, 7, 9, 11, 15, 18, 20, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 49, 51, 53, 54, 55], "In": [3, 4, 6, 10, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 51], "page": [3, 19, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 58], "describ": [3, 5, 8, 9, 17, 22, 24, 25, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 44, 45, 54, 55], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "creat": [3, 4, 6, 7, 14, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 32, 49, 56], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 28, 29, 30, 48, 51, 56, 58], "decod": [3, 4, 8, 10, 11, 14, 15, 19, 20, 24, 25, 26, 29, 30, 49], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 19, 20, 21, 23, 37, 48, 51, 56, 58], "As": [4, 5, 6, 7, 24, 33, 36, 37, 49], "type": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 36, 39, 41, 42, 47, 51, 53, 54, 55], "e2": [4, 7, 21, 49], "usual": [4, 6, 7, 12, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55, 57], "an": [4, 5, 6, 7, 9, 11, 13, 15, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 36, 37, 39, 42, 47, 48, 53, 54, 55, 57, 58], "intern": [4, 5], "languag": [4, 7, 11, 19, 20, 31, 33, 34, 48, 58], "learn": [4, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "level": [4, 5, 15, 49], "corpu": [4, 6, 7, 32, 49], "real": 4, "life": 4, "scenario": 4, "often": [4, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "mismatch": [4, 54], "between": [4, 7, 39, 54, 55], "target": [4, 19, 21], "space": [4, 17, 20, 49], "problem": [4, 6, 7, 21, 37], "when": [4, 6, 9, 10, 15, 19, 24, 25, 26, 30, 33, 36, 37, 39, 41, 42, 49, 54, 55], "act": 4, "against": [4, 21], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 20, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55, 57], "low": [4, 24, 25], "order": [4, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 26], "improv": [4, 5, 6, 7, 33, 49], "perform": [4, 6, 7, 23, 33, 37, 54], "languga": 4, "integr": [4, 19], "pruned_transducer_stateless7_stream": [4, 6, 7, 26, 27, 55], "stream": [4, 6, 7, 15, 20, 23, 24, 25, 27, 30, 31, 36, 44, 45, 53, 58], "howev": [4, 6, 7, 22, 25, 37], "easili": [4, 6, 7, 31, 34, 36], "appli": [4, 6, 7, 33, 51], "other": [4, 7, 13, 14, 15, 22, 25, 26, 27, 33, 36, 37, 39, 40, 44, 45, 47, 51, 54, 55, 58], "encount": [4, 6, 7, 10, 21, 26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "here": [4, 6, 7, 22, 24, 25, 26, 31, 33, 34, 36, 37, 40, 51, 54], "simplic": [4, 6, 7], "same": [4, 6, 7, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 18, 28, 53], "first": [4, 6, 9, 10, 11, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "let": [4, 6, 7, 14, 21, 24, 25, 26, 31, 36, 49], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 33, 51, 55], "address": [4, 9, 15, 19, 21, 22, 24, 25, 26, 33, 39, 42, 53, 54, 55], "sourc": [4, 11, 13, 21, 22, 24, 25, 26, 31, 32, 33, 36], "acoust": [4, 54, 55], "similar": [4, 5, 37, 41, 54, 55], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "score": [4, 5, 7, 31, 36, 39, 54, 55], "left": [4, 24, 26, 33, 54, 55], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 24, 33, 51, 54], "log": [4, 9, 10, 12, 15, 16, 21, 24, 25, 26, 40, 44, 45, 47, 57], "y_": 4, "u": [4, 21, 24, 25, 26, 31, 33, 34, 36, 37, 47], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 10, 54], "weight": [4, 15, 31, 34, 36, 41, 42, 49, 53], "respect": 4, "onli": [4, 6, 8, 9, 11, 13, 14, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "compar": [4, 24, 25, 26, 54], "shallow": [4, 5, 20, 49], "fusion": [4, 5, 20, 49], "subtract": [4, 5], "work": [4, 9, 13, 15, 24, 25, 26, 36], "treat": [4, 25, 26], "predictor": 4, "joiner": [4, 24, 25, 26, 27, 29, 33, 39, 53, 54, 55], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 21, 31, 37, 39, 41, 42, 44, 45, 53, 54, 55], "gram": [4, 6, 21, 31, 33, 34, 39, 40, 42, 44, 45, 54, 55], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "formula": 4, "rnnt": [4, 39, 54, 55], "bi": [4, 6], "addit": 4, "estim": 4, "li": 4, "choic": 4, "accord": [4, 49], "origin": [4, 5], "paper": [4, 5, 37, 39, 53, 54, 55, 57], "achiev": [4, 6, 7, 49, 51], "both": [4, 39, 41, 42, 51, 53, 54, 55], "intra": 4, "cross": 4, "much": [4, 24, 25], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 13, 15, 21, 24, 25, 26, 31, 36, 37, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55], "illustr": [4, 6, 7, 49], "purpos": [4, 6, 7, 24, 25, 49], "from": [4, 6, 7, 9, 10, 11, 14, 15, 17, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "link": [4, 6, 7, 18, 21, 22, 23, 39, 41, 42, 53, 54, 55, 57], "scratch": [4, 6, 7, 39, 41, 42, 49, 53, 54, 55, 57], "prune": [4, 6, 7, 22, 26, 27, 33, 35, 37, 38, 50, 51, 52, 53, 55], "statelessx": [4, 6, 7, 35, 37, 38, 50, 51, 52], "initi": [4, 6, 7, 9, 31, 34], "step": [4, 6, 7, 11, 14, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 49, 53, 54, 55], "download": [4, 6, 7, 8, 10, 13, 15, 19, 20, 23, 30, 32, 37, 49, 56], "git_lfs_skip_smudg": [4, 6, 7, 24, 25, 26, 27, 49], "huggingfac": [4, 6, 7, 18, 20, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 41, 42, 44, 45, 47, 49, 53, 57], "co": [4, 6, 7, 18, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 40, 41, 42, 44, 45, 47, 49, 53, 57], "zengwei": [4, 6, 7, 24, 26, 27, 42, 49, 53, 57], "stateless7": [4, 6, 7, 26, 27], "2022": [4, 6, 7, 22, 24, 25, 26, 27, 33, 39, 41, 42, 53, 54], "12": [4, 6, 7, 9, 14, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 44, 47, 53, 54, 55], "29": [4, 6, 7, 21, 26, 27, 31, 33, 34, 36, 40, 41, 44, 45, 57], "exp": [4, 6, 7, 9, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "lf": [4, 6, 7, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 42, 44, 45, 47, 49], "includ": [4, 6, 7, 24, 25, 26, 27, 39, 41, 42, 49, 53, 54, 55], "pt": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "ln": [4, 6, 7, 9, 15, 22, 24, 25, 26, 27, 31, 36, 39, 41, 42, 49, 53, 54, 55], "epoch": [4, 6, 7, 9, 12, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "99": [4, 6, 7, 15, 21, 24, 25, 26, 27], "symbol": [4, 5, 6, 7, 21, 33, 39, 54, 55], "load": [4, 6, 7, 9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "lang_bpe_500": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 36, 39, 41, 42, 49, 53, 54, 55], "bpe": [4, 5, 6, 7, 22, 24, 25, 26, 27, 29, 36, 39, 41, 42, 49, 53, 54, 55], "done": [4, 6, 7, 9, 13, 15, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "via": [4, 6, 7, 14, 21, 23, 28, 29, 30, 49], "exp_dir": [4, 6, 7, 9, 15, 21, 24, 25, 26, 33, 36, 37, 39, 41, 42, 54, 55], "avg": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "averag": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "fals": [4, 6, 7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37], "dir": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "max": [4, 6, 7, 21, 22, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55, 57], "durat": [4, 6, 7, 11, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "600": [4, 6, 7, 21, 22, 36, 39, 41, 53, 54, 55], "chunk": [4, 6, 7, 24, 26, 27, 54, 55], "len": [4, 6, 7, 26, 27, 55], "32": [4, 6, 7, 21, 24, 25, 26, 27, 31, 33, 34, 55], "method": [4, 5, 7, 15, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 53, 54, 55], "modified_beam_search": [4, 5, 6, 7, 19, 33, 37, 39, 41, 53, 54, 55], "clean": [4, 9, 15, 21, 26, 31, 33, 36, 37, 39, 40, 41, 42, 53, 54, 55], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 21, 24, 25, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "best": [4, 5, 6, 7, 24, 25, 26, 31, 34, 36], "7": [4, 6, 7, 9, 21, 22, 23, 26, 30, 31, 34, 36, 39, 40, 44, 45, 53, 54], "93": [4, 6, 7, 15], "Then": [4, 6], "necessari": [4, 37, 49], "note": [4, 5, 6, 7, 10, 11, 15, 22, 24, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "960": [4, 36, 39, 41, 42, 53, 54, 55], "hour": [4, 13, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 27], "popd": [4, 6, 7, 27], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 21, 33, 47], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 21, 36], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 21, 25, 31, 36, 47], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 21, 24, 25, 34, 40, 44, 45, 47], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 24, 25, 31, 36, 37, 40, 42, 44, 45], "embed": [4, 6, 7, 33, 39, 49, 53, 54, 55], "dim": [4, 6, 7, 24, 25, 26, 33, 39, 49, 54], "2048": [4, 6, 7, 22, 24, 25, 26, 33, 49], "hidden": [4, 6, 7, 25, 49, 53], "num": [4, 6, 7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55, 57], "layer": [4, 6, 7, 24, 25, 26, 33, 37, 39, 49, 51, 53, 54, 55], "vocab": [4, 6, 7, 36], "500": [4, 6, 7, 22, 24, 25, 26, 33, 36, 42, 53, 57], "token": [4, 11, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 49, 57], "ngram": [4, 36, 40, 44, 45], "2": [4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 30, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "extra": [4, 24, 25, 26, 33, 51, 54], "argument": [4, 7, 15, 37, 51], "need": [4, 6, 11, 13, 14, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "given": [4, 9, 11, 12, 13, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 54, 55, 57], "specifi": [4, 7, 10, 12, 15, 16, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "neg": [4, 33], "number": [4, 7, 16, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "obtain": [4, 7, 31, 33, 34, 36, 40, 44, 45], "shown": [4, 7], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54], "61": [4, 6], "6": [4, 6, 7, 9, 10, 11, 15, 23, 30, 31, 33, 36, 39, 40, 44, 45, 53], "74": [4, 6, 21, 22], "recal": 4, "lowest": [4, 12, 15, 39, 41, 42, 53, 54, 55], "77": [4, 6, 7, 21, 36], "08": [4, 6, 7, 9, 15, 26, 36, 40, 42, 44, 45, 47, 53], "inde": 4, "even": [4, 19, 21, 25], "better": [4, 6], "increas": [4, 6, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "8": [4, 6, 7, 9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 47, 53, 54, 55], "45": [4, 6, 15, 21, 24, 26, 31, 33, 36], "38": [4, 6, 21, 24, 31, 33, 36, 44], "23": [4, 6, 9, 10, 11, 15, 21, 24, 25, 26, 31, 33, 34, 36, 44, 45, 47], "section": [5, 8, 9, 10, 17, 21, 22, 27, 28, 29, 30, 31, 36], "langugag": 5, "transduc": [5, 20, 22, 23, 27, 30, 32, 35, 37, 38, 49, 50, 51, 52], "rnn": [5, 6, 7, 20, 25, 33, 39, 41, 53, 54, 55, 58], "avail": [5, 6, 8, 15, 20, 21, 22, 24, 25, 26, 31, 33, 36, 40, 44, 45, 47, 53], "beam": [5, 22, 53], "search": [5, 6, 7, 18, 19, 56], "realli": [5, 31, 34, 36, 39, 41, 42, 53, 54, 55], "valu": [5, 7, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "t": [5, 13, 14, 15, 21, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "doe": [5, 15, 24, 25, 26, 31, 33, 36, 47], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 17, 18, 21, 22, 23, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 47, 49, 51, 53, 54, 55], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 49], "re": [5, 6, 10, 31, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 20, 49], "commonli": [6, 7, 31, 33, 34, 36, 40, 44, 45, 47], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 14, 21, 24, 25, 26, 31, 36, 37, 47, 49, 51, 53, 54, 57], "effici": [6, 7, 39, 54, 55], "than": [6, 21, 22, 25, 31, 33, 34, 36, 39, 40, 41, 42, 47, 53, 54, 55], "sinc": [6, 13, 21, 24, 25, 26, 37, 47, 53], "less": [6, 22, 36, 40, 47, 54, 55], "comput": [6, 15, 21, 22, 24, 25, 26, 31, 33, 34, 37, 39, 40, 42, 44, 45, 47, 53, 54, 55], "gpu": [6, 7, 8, 13, 14, 20, 21, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 44, 45, 47, 53, 54, 55], "try": [6, 10, 12, 15, 17, 19, 37, 39, 41, 42, 53, 54, 55], "might": [6, 7, 25, 26, 54, 55], "ideal": [6, 7], "mai": [6, 7, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55, 58], "With": [6, 21], "43": [6, 9, 25, 26, 36], "great": 6, "made": [6, 24], "boost": [6, 7], "tabl": [6, 19, 24, 25, 26], "67": [6, 21], "59": [6, 15, 21, 24, 34, 36], "86": 6, "fact": 6, "arpa": [6, 11, 47], "performn": 6, "depend": [6, 14, 15, 21, 31, 36], "kenlm": 6, "kpu": 6, "archiv": [6, 49], "zip": 6, "execut": [6, 7, 13, 24, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "9": [6, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 47, 53, 54, 55], "57": [6, 21, 25, 36, 40], "slightli": 6, "63": [6, 33], "04": [6, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "52": [6, 21, 31, 36], "73": 6, "mention": [6, 51], "earlier": 6, "benchmark": [6, 33], "speed": [6, 24, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "132": 6, "95": [6, 32], "177": [6, 21, 22, 25, 26, 33, 34, 36], "96": [6, 21], "210": [6, 44, 45], "262": [6, 7, 15], "62": [6, 7, 21, 36, 40], "65": [6, 7, 21, 24], "352": [6, 7, 36], "58": [6, 7, 10, 21, 36], "488": [6, 7, 24, 25, 26], "400": [6, 9, 32], "610": 6, "870": 6, "156": [6, 15], "203": [6, 15, 22, 36], "255": [6, 25, 26], "160": [6, 15], "263": [6, 9, 15, 21, 25], "singl": [6, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "32g": 6, "v100": [6, 31, 33, 34, 36], "vari": 6, "word": [7, 11, 12, 15, 31, 33, 34, 36, 40, 44, 45, 47, 49], "error": [7, 9, 10, 12, 13, 15, 21, 24, 25, 26, 36], "rate": [7, 12, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "These": [7, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "alreadi": [7, 11, 13, 21, 22], "But": [7, 24, 39, 41, 42, 53, 54, 55], "long": [7, 24, 49, 57], "true": [7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "either": [7, 15, 19, 31, 33, 34, 36, 54, 55], "choos": [7, 19, 21, 37, 39, 41, 42, 53, 54, 55], "three": [7, 15, 24, 25, 26, 29, 31, 33, 51], "associ": 7, "dimens": [7, 39, 49, 54, 55], "obviou": 7, "rel": 7, "reduct": [7, 15, 21, 24, 25, 41], "around": 7, "A": [7, 14, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 53, 54, 55], "few": [7, 11, 24, 25, 26, 37], "paramet": [7, 14, 22, 24, 25, 26, 28, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55, 57], "tune": [7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "control": [7, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "too": 7, "small": [7, 33, 44, 45, 47], "fulli": 7, "util": [7, 9, 10, 15, 21, 36], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 31, 33, 34, 36], "activ": [7, 13, 19, 21], "path": [7, 9, 15, 19, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "trade": 7, "off": [7, 24], "accuraci": [7, 24, 25, 32], "larger": [7, 25, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "slower": 7, "built": [8, 9, 21], "imag": [8, 20], "cpu": [8, 12, 13, 14, 15, 16, 20, 21, 22, 24, 25, 26, 28, 31, 39, 41, 42, 47, 54, 55], "still": [8, 24, 25, 26], "introduct": [8, 20, 50, 58], "tag": [8, 20], "within": [8, 14, 17, 19, 20, 24, 25], "updat": [8, 24, 25, 26], "host": [9, 22], "hub": 9, "k2fsa": 9, "find": [9, 10, 16, 17, 18, 19, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "dockerfil": 9, "tree": [9, 11, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 53], "item": [9, 14], "curl": 9, "registri": 9, "v2": [9, 26, 31, 36], "jq": 9, "give": [9, 11, 15, 33], "someth": [9, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 21], "torch1": [9, 10, 21], "cuda10": 9, "13": [9, 10, 15, 21, 22, 24, 25, 26, 33, 34, 36, 40, 41, 44], "releas": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "torch": [9, 10, 13, 14, 20, 22, 23, 30, 31, 33, 36], "select": [9, 12, 13, 14, 19, 21, 24, 25, 26, 39, 40, 44, 45, 47, 53, 54, 55], "appropri": [9, 21], "combin": [9, 12, 24, 25, 26], "cuda": [9, 10, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "sudo": [9, 31, 34], "rm": 9, "bin": [9, 13, 21, 24, 25, 26, 31, 36], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 19, 21, 22, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "interfac": 9, "present": [9, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "root": [9, 24, 25, 26, 49], "60c947eac59c": 9, "workspac": 9, "current": [9, 19, 24, 25, 33, 37, 51, 53, 54, 55, 57, 58], "user": [9, 10], "copi": [9, 21, 51], "switch": [9, 21, 31, 36, 42], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 21, 26], "site": [9, 10, 15, 21, 26], "packag": [9, 10, 15, 21, 26], "__init__": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36], "line": [9, 10, 11, 24, 25, 26, 39, 49, 54, 55], "modul": [9, 13, 20, 24, 26, 41, 54], "_k2": [9, 10, 21], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 20], "libcuda": 9, "cannot": [9, 20, 24, 25, 26], "share": [9, 20, 21], "object": [9, 20, 21, 31, 33, 34, 39, 47, 53, 54], "No": [9, 13, 20, 24, 25, 26, 47], "stub": 9, "list": [9, 15, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "16": [9, 15, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 44, 45, 47, 53, 54, 55], "second": [9, 14, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "2023": [9, 15, 21, 24, 25, 26, 41, 49, 57], "01": [9, 11, 15, 21, 24, 33, 34, 36, 37, 41], "02": [9, 11, 21, 22, 24, 25, 26, 33, 36, 39, 45, 53, 54], "06": [9, 15, 21, 22, 24, 34, 36, 40, 47], "info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "264": [9, 21, 26], "posixpath": [9, 15, 21, 24, 25, 26, 33, 36], "lang_dir": [9, 15, 21, 33, 36], "lang_phon": [9, 11, 15, 21, 34, 40, 44, 45, 47], "feature_dim": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 47], "search_beam": [9, 15, 21, 31, 36, 47], "20": [9, 14, 15, 21, 22, 24, 26, 31, 33, 34, 36, 39, 40, 44, 45, 47, 49, 54], "output_beam": [9, 15, 21, 31, 36, 47], "min_active_st": [9, 15, 21, 31, 36, 47], "30": [9, 10, 15, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "max_active_st": [9, 15, 21, 31, 36, 47], "10000": [9, 15, 21, 31, 36, 47], "use_double_scor": [9, 15, 21, 31, 36, 47], "14": [9, 10, 15, 21, 22, 24, 25, 28, 31, 36, 39, 40, 41, 44, 53, 54, 55], "export": [9, 10, 11, 12, 13, 14, 16, 20, 21, 31, 33, 34, 36, 37, 40, 44, 45, 47, 56], "feature_dir": [9, 15, 21, 36], "fbank": [9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "max_dur": [9, 15, 21, 36], "bucketing_sampl": [9, 15, 21, 36], "num_bucket": [9, 15, 21, 36], "concatenate_cut": [9, 15, 21, 36], "duration_factor": [9, 15, 21, 36], "gap": [9, 15, 21, 36], "on_the_fly_feat": [9, 15, 21, 36], "shuffl": [9, 15, 21, 36], "return_cut": [9, 15, 21, 36], "num_work": [9, 15, 21, 36], "env_info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "sha1": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 21], "date": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "tue": [9, 21, 24, 36], "jul": [9, 15, 21], "25": [9, 15, 21, 22, 24, 25, 31, 36, 39, 44, 45, 47, 54], "36": [9, 21, 24, 33, 36, 37], "dev": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "7640d663": 9, "branch": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 41], "375520d": 9, "fri": [9, 22], "28": [9, 21, 24, 25, 33, 36, 40], "07": [9, 21, 24, 25, 26, 31, 33, 34, 36], "hostnam": [9, 15, 21, 22, 24, 25, 26, 33], "ip": [9, 15, 21, 22, 24, 25, 26, 33], "172": 9, "17": [9, 21, 22, 24, 25, 26, 31, 36, 44, 45, 53], "401": 9, "lexicon": [9, 11, 15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "168": [9, 15, 21, 40], "compil": [9, 15, 21, 24, 25, 31, 33, 36], "linv": [9, 11, 15, 21, 33, 36, 47], "403": [9, 40], "273": [9, 15, 21, 22, 33], "devic": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 54, 55], "406": [9, 36], "291": [9, 21], "424": 9, "218": [9, 15, 21, 25], "about": [9, 11, 12, 14, 15, 16, 21, 24, 25, 26, 33, 37, 39, 42, 53, 54, 55], "cut": [9, 15, 21, 36], "425": [9, 25, 36], "252": [9, 21], "504": 9, "204": [9, 21, 26, 36], "batch": [9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "process": [9, 15, 21, 22, 24, 25, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "until": [9, 15, 21, 36, 41], "w": [9, 21, 36, 44, 45], "nnpack": 9, "cpp": [9, 24, 28], "53": [9, 15, 21, 26, 31, 39, 40, 45, 53, 54], "could": [9, 24, 25, 26, 31, 34, 49], "reason": [9, 14, 22, 24, 25, 26, 54], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 21, 31], "transcript": [9, 15, 21, 31, 32, 33, 34, 36, 39, 40, 44, 45, 53, 54, 55], "store": [9, 11, 15, 21, 36, 49], "recog": [9, 15, 21, 33, 36], "test_set": [9, 15, 21, 47], "688": 9, "564": [9, 15, 21], "240": [9, 15, 21, 31, 47], "ins": [9, 15, 21, 36, 47], "del": [9, 15, 21, 36, 47], "sub": [9, 15, 21, 36, 47], "690": 9, "249": [9, 21, 25], "wrote": [9, 15, 21, 36], "detail": [9, 11, 15, 21, 23, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57], "stat": [9, 15, 21, 36], "err": [9, 15, 21, 33, 36], "316": [9, 21, 36], "congratul": [9, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45, 47], "finish": [9, 14, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55], "successfulli": [9, 13, 21, 24, 25, 26], "collect": [10, 13, 21, 49], "post": 10, "correspond": [10, 18, 19], "solut": 10, "One": 10, "torchaudio": [10, 13, 20, 51], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 21, 44, 45], "org": [10, 13, 21, 32, 33, 39, 49, 53, 54, 55], "whl": [10, 13, 21], "torch_stabl": [10, 13, 21], "throw": [10, 24, 25, 26], "while": [10, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55], "That": [10, 11, 14, 15, 16, 24, 25, 37, 39, 53, 54, 55], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 54, 55], "recent": [10, 24, 25, 26], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 24, 25, 31, 36], "xxx": [10, 22, 24, 25, 26], "next": [10, 13, 14, 19, 21, 24, 25, 26, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55], "gen": [10, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "kaldi": [10, 11, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "34": [10, 24, 25], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 21, 24, 26, 33, 36, 40, 44], "tensorboard": [10, 16, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 21], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "py3": [10, 21], "linux": [10, 13, 14, 19, 21, 23, 24, 25, 26, 27], "x86_64": [10, 21, 24], "egg": 10, "handl": [10, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "except": [10, 22], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 41], "104": [10, 15, 21], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 19, 23, 24, 25, 26, 27], "probabl": [10, 33, 39, 41, 53, 54, 55], "variabl": [10, 12, 13, 16, 21, 24, 25, 26, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 24], "libpython": 10, "abl": 10, "insid": [10, 29], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 21, 24, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55, 57], "everyth": [11, 23], "tmp": [11, 12, 13, 15, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pythonpath": [11, 12, 13, 15, 16, 21, 24, 25, 26], "each": [11, 15, 22, 24, 25, 27, 31, 33, 34, 36, 39, 41, 42, 49, 51, 53, 54, 55], "exist": 11, "anyth": [11, 17, 19], "els": 11, "wonder": [11, 15], "url": [11, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "varieti": 11, "folder": [11, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "wav": [11, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 44, 45, 47, 53, 54, 55, 57], "scp": 11, "feat": 11, "put": [11, 13, 21, 24, 25, 41, 54], "l": [11, 21, 24, 25, 26, 33, 44, 45, 47], "waves_yesno": [11, 15, 21], "tar": [11, 21], "gz": [11, 21, 49], "l41": 11, "extract": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "yesno_cuts_test": 11, "jsonl": [11, 22], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 21, 40, 44, 45, 47], "l_disambig": [11, 47], "lexicon_disambig": [11, 47], "manifest": [11, 21, 37], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 53, 54, 55], "thei": [11, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "idea": [11, 15, 51], "examin": 11, "relat": [11, 22, 31, 33, 36, 40, 44, 45, 47], "gunzip": 11, "c": [11, 21, 33, 34, 39, 41, 42, 47, 53, 54, 55], "head": [11, 21, 33, 51], "output": [11, 12, 13, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "id": [11, 31, 34, 36, 40, 44, 45], "0_0_0_0_1_1_1_1": 11, "channel": [11, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 21, 22, 24, 25, 26, 33, 36, 53], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 21, 44, 45], "l300": 11, "mean": [11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "field": [11, 32], "per": [11, 33, 39, 54, 55], "recording_id": 11, "NO": [11, 15, 47], "ye": [11, 15, 47], "hebrew": [11, 47], "supervis": 11, "l510": 11, "furthermor": [11, 33], "featur": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "compress": [11, 21], "lilcom": [11, 21], "cutset": 11, "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": 11, "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 19, 25, 26, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "separ": [11, 27, 49], "lang": [11, 21, 22, 33, 36, 42], "quit": [12, 14, 16, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "cuda_visible_devic": [12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "usag": [12, 15, 16, 22, 24, 25, 26, 28, 29, 40, 44, 45, 47], "one": [12, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "tini": [13, 14], "well": [13, 22, 47, 58], "hundr": 13, "thousand": 13, "virtualenv": [13, 21], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 22], "pkg_resourc": 13, "wheel": [13, 21, 24], "remeb": 13, "continu": [13, 15, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "caution": [13, 31, 36], "matter": [13, 21, 24], "torchaduio": 13, "window": [13, 14, 19, 23, 24, 25, 26, 27], "from_wheel": [13, 15, 21], "dev20230726": [13, 15], "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 21, 24, 25, 26, 28, 31, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "walk": 14, "recognit": [14, 19, 20, 23, 24, 25, 32, 33, 47, 58], "system": [14, 49], "out": [14, 37, 49], "minut": [14, 49], "sequenti": 14, "part": [14, 15, 19, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 20, 30, 31, 33, 34, 36, 40, 44, 45, 47], "jit": [14, 20, 23, 30, 36], "onnx": [14, 20, 22, 30, 57], "torchscript": [15, 23, 28, 29, 30], "trace": [15, 20, 23, 28, 30], "explain": 15, "kind": [15, 36, 39, 41, 42, 53, 54, 55], "produc": [15, 23, 39, 41, 42, 53, 54, 55], "03": [15, 21, 22, 25, 33, 36, 44, 45, 53], "912": [15, 22], "76": [15, 21, 47], "lr": [15, 21, 33, 53], "weight_decai": [15, 21], "1e": [15, 21], "start_epoch": [15, 21], "best_train_loss": [15, 21, 22, 24, 25, 26], "inf": [15, 21, 22, 24, 25, 26], "best_valid_loss": [15, 21, 22, 24, 25, 26], "best_train_epoch": [15, 21, 22, 24, 25, 26], "best_valid_epoch": [15, 21, 22, 25, 26], "batch_idx_train": [15, 21, 22, 24, 25, 26], "log_interv": [15, 21, 22, 24, 25, 26], "reset_interv": [15, 21, 22, 24, 25, 26], "valid_interv": [15, 21, 22, 24, 25, 26], "beam_siz": [15, 21, 22, 33], "sum": [15, 21], "913": 15, "950": 15, "971": [15, 45], "106": [15, 21, 25, 36], "Not": 15, "974": 15, "111": [15, 21, 36], "kei": [15, 24, 25, 26, 36], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 21, 24, 25, 31, 33], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 21, 24, 31, 33, 36], "26": [15, 21, 24, 25, 26, 33, 36, 45], "09": [15, 22, 25, 31, 33, 34, 36, 53], "aa073f6": 15, "none": [15, 21, 31, 36], "9a47c08": 15, "mon": [15, 25, 26], "aug": [15, 37], "50": [15, 21, 22, 24, 25, 26, 36, 39, 44, 53, 54, 55], "privat": 15, "fangjun": [15, 21, 22, 24, 25, 26, 33, 36], "macbook": 15, "pro": [15, 31, 36], "127": [15, 21, 24, 25, 47], "092": 15, "103": 15, "272": 15, "109": [15, 21, 31, 36], "112": [15, 24, 25, 26], "115": [15, 24, 25, 31, 36], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 33], "559": 15, "315": [15, 24, 31, 33, 34, 36, 40], "ident": 15, "kaldifeat": 15, "csukuangfj": [15, 21, 22, 24, 25, 27, 31, 33, 34, 36, 40, 44, 45, 47, 53], "0_0_0_1_0_0_0_1": [15, 47], "0_0_1_0_0_0_1_0": [15, 47], "19": [15, 22, 24, 25, 26, 31, 36, 40, 44, 45], "208": [15, 36], "136": [15, 36], "num_class": [15, 31, 36, 47], "sample_r": [15, 22, 31, 33, 36, 47], "words_fil": [15, 31, 36, 47], "sound_fil": [15, 22, 31, 33, 36, 47], "142": [15, 24, 31, 34, 36], "144": [15, 36], "212": 15, "213": [15, 47], "construct": [15, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "170": [15, 40], "sound": [15, 22, 24, 25, 26, 29, 30, 31, 33, 34, 36, 40, 44, 45, 47], "224": 15, "176": [15, 24, 33, 36], "304": [15, 25], "214": [15, 33, 36], "47": [15, 21, 24, 25, 26, 31, 36], "44": [15, 21, 24, 25, 36, 44, 45], "666": 15, "667": 15, "670": 15, "677": [15, 24], "100": [15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "843": 15, "cpu_jit": [15, 28, 31, 36, 39, 41, 42, 54, 55], "confus": [15, 28], "move": [15, 28, 39, 41, 42, 54, 55], "map_loc": 15, "resid": 15, "default": [15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "jit_pretrain": [15, 29, 41, 42, 53], "nn": [15, 33, 39, 41, 42, 53, 54, 55], "56": [15, 21, 24, 25, 36, 44], "00": [15, 21, 24, 31, 33, 34, 36, 40, 44, 45, 47], "603": 15, "121": [15, 40], "nn_model": [15, 31, 36], "129": [15, 34], "640": [15, 21, 26], "134": [15, 31], "641": 15, "138": [15, 31, 33], "148": 15, "642": 15, "154": [15, 34], "727": 15, "190": [15, 40], "192": [15, 26, 36], "export_onnx": 15, "onnxruntim": [15, 27], "888": [15, 31], "83": [15, 36, 40], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 21, 22, 24, 31, 33, 36, 44, 45], "047": [15, 33], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 20, 36, 51, 54, 58], "vocab_s": [15, 22, 24, 25, 26, 33], "049": 15, "140": [15, 21, 34], "int8": [15, 23, 30, 57], "quantiz": [15, 23, 30, 37], "075": 15, "onnx_quant": 15, "538": [15, 36], "tensor": [15, 21, 25, 26, 31, 33, 34, 36, 39, 47, 53, 54], "transpose_1_output_0": 15, "081": 15, "151": [15, 24], "float32": [15, 24, 25, 26], "onnx_pretrain": [15, 27], "260": [15, 26, 36], "166": 15, "171": [15, 21, 34, 36, 44, 45], "173": 15, "267": [15, 25, 33, 44, 45], "270": 15, "180": [15, 25, 31, 36], "279": [15, 36], "196": 15, "318": [15, 24, 25], "232": 15, "234": [15, 36], "deploi": [15, 27, 31, 36], "sherpa": [15, 19, 23, 28, 29, 30, 53], "framework": [15, 19, 39, 54], "_": [15, 37], "ncnn": [15, 20, 30], "youtub": [17, 20, 36, 37, 39, 40, 41, 42, 53, 54, 55], "video": [17, 20, 36, 37, 39, 40, 41, 42, 53, 54, 55], "upload": [18, 19, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "visit": [18, 19, 39, 41, 42, 53, 54, 55, 57], "specif": [18, 27, 33], "aishel": [18, 20, 31, 33, 34, 35, 58], "wenetspeech": [18, 28], "ipad": 19, "phone": 19, "screenshot": [19, 31, 33, 34, 36, 37, 39, 47, 53, 54], "chines": [19, 32, 33], "english": [19, 47, 53], "greedi": 19, "click": [19, 21, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "button": 19, "submit": 19, "wait": 19, "moment": 19, "bottom": [19, 39, 41, 42, 53, 54, 55], "subscrib": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "nadira": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "povei": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "www": [19, 21, 32, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55], "uc_vaumpkminz1pnkfxan9mw": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "dummi": [20, 36], "toolkit": 20, "cudnn": 20, "docker": [20, 21], "frequent": 20, "ask": 20, "question": 20, "faq": 20, "oserror": 20, "libtorch_hip": 20, "attributeerror": 20, "distutil": 20, "attribut": [20, 26, 36], "libpython3": 20, "timit": [20, 35, 44, 45, 58], "tt": [20, 57, 58], "vit": [20, 56, 58], "contribut": 20, "support": [21, 23, 24, 25, 26, 31, 33, 36, 39, 41, 42, 51, 53, 54, 55, 57], "guid": 21, "suggest": [21, 39, 41, 42, 53, 54, 55], "alwai": [21, 22], "strongli": 21, "point": [21, 22, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "sever": [21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "just": [21, 24, 25, 26, 49, 51], "kuangfangjun": [21, 24, 25, 26], "cpython3": 21, "final": [21, 22, 24, 25, 36, 40], "64": [21, 22, 24, 33, 54], "9422m": 21, "creator": 21, "cpython3posix": 21, "dest": 21, "star": [21, 24, 25, 26], "fj": [21, 22, 24, 25, 26, 33, 36], "clear": 21, "no_vcs_ignor": 21, "global": 21, "seeder": 21, "fromappdata": 21, "bundl": 21, "app_data_dir": 21, "ad": [21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54, 55], "seed": 21, "bashactiv": 21, "cshellactiv": 21, "fishactiv": 21, "nushellactiv": 21, "powershellactiv": 21, "pythonactiv": 21, "determin": 21, "nvidia": [21, 31, 33, 34, 36], "smi": 21, "49": [21, 24, 25, 36, 45, 47], "510": 21, "driver": 21, "greater": 21, "our": [21, 24, 25, 26, 28, 29, 36, 37, 39, 51, 54, 55], "case": [21, 22, 24, 25, 26, 39, 41, 42, 53, 54, 55], "verifi": 21, "nvcc": 21, "copyright": 21, "2005": 21, "2019": 21, "corpor": 21, "wed_oct_23_19": 21, "38_pdt_2019": 21, "v10": 21, "89": [21, 31], "cu116": 21, "compat": 21, "stabl": 21, "matrix": 21, "2bcu116": 21, "cp38": 21, "linux_x86_64": 21, "1983": 21, "mb": [21, 24, 25, 26], "________________________________________": 21, "gb": [21, 33], "764": 21, "kb": [21, 24, 25, 26, 44, 45], "eta": 21, "satisfi": 21, "extens": 21, "__version__": 21, "dev20230725": 21, "pypi": 21, "tuna": 21, "tsinghua": 21, "edu": 21, "cn": 21, "resolv": 21, "main": [21, 31, 36, 51], "ubuntu": [21, 24, 25, 26], "2bcuda11": 21, "manylinux_2_17_x86_64": 21, "manylinux2014_x86_64": 21, "graphviz": 21, "cach": [21, 26], "de": [21, 22, 24, 25, 26, 33], "5e": 21, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 21, "o": 21, "cento": 21, "2009": 21, "core": 21, "cmake": [21, 24, 25, 31, 36], "27": [21, 24, 25, 26, 31, 33, 40, 45], "gcc": 21, "cmake_cuda_flag": 21, "wno": 21, "deprec": [21, 33], "lineinfo": 21, "expt": 21, "extend": 21, "lambda": 21, "use_fast_math": 21, "xptxa": 21, "gencod": 21, "arch": 21, "compute_35": 21, "sm_35": 21, "compute_50": 21, "sm_50": 21, "compute_60": 21, "sm_60": 21, "compute_61": 21, "sm_61": 21, "compute_70": 21, "sm_70": 21, "compute_75": 21, "sm_75": 21, "compute_80": 21, "sm_80": 21, "compute_86": 21, "sm_86": 21, "donnx_namespac": 21, "onnx_c2": 21, "compute_52": 21, "sm_52": 21, "xcudaf": 21, "diag_suppress": 21, "cc_clobber_ignor": 21, "integer_sign_chang": 21, "useless_using_declar": 21, "set_but_not_us": 21, "field_without_dll_interfac": 21, "base_class_has_different_dll_interfac": 21, "dll_interface_conflict_none_assum": 21, "dll_interface_conflict_dllexport_assum": 21, "implicit_return_from_non_void_funct": 21, "unsigned_compare_with_zero": 21, "declared_but_not_referenc": 21, "bad_friend_decl": 21, "relax": 21, "constexpr": 21, "d_glibcxx_use_cxx11_abi": 21, "option": [21, 23, 27, 30, 33, 37, 40, 44, 45, 47], "wall": 21, "strict": [21, 26, 32], "overflow": 21, "unknown": 21, "pragma": 21, "cmake_cxx_flag": 21, "unus": 21, "nvtx": 21, "enabl": [21, 37], "disabl": [21, 22, 24, 25], "debug": 21, "sync": 21, "kernel": [21, 24, 26, 33], "memori": [21, 24, 31, 33, 36, 51], "alloc": 21, "214748364800": 21, "byte": [21, 24, 25, 26], "200": [21, 22, 24, 25, 26, 31, 36, 37, 44, 45, 47], "abort": 21, "__file__": 21, "cpython": [21, 24], "gnu": [21, 24], "req": 21, "vq12fd5i": 21, "filter": 21, "quiet": [21, 32], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 21, "metadata": [21, 44, 45], "pyproject": 21, "toml": 21, "cytoolz": 21, "3b": 21, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 21, "33": [21, 24, 25, 31, 32, 33, 36, 44], "pyyaml": 21, "c8": 21, "6b": 21, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 21, "ma": 21, "nylinux_2_17_x86_64": 21, "736": 21, "dataclass": 21, "2f": 21, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 21, "dev0": 21, "7640d66": 21, "a8": 21, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 21, "linux_2_17_x86_64": 21, "87": [21, 24], "tqdm": 21, "e6": 21, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 21, "numpi": 21, "audioread": 21, "5d": 21, "cb": 21, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 21, "377": 21, "tabul": 21, "40": [21, 24, 25, 26, 34, 36, 40, 44, 45], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 21, "1a": 21, "70": 21, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 21, "97": [21, 24, 31], "ab": [21, 39, 53, 54, 55], "c3": 21, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 21, "intervaltre": 21, "fb": 21, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 21, "soundfil": 21, "bd": 21, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 21, "py2": 21, "46": [21, 25, 31, 36], "toolz": 21, "7f": 21, "5c": 21, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 21, "55": [21, 24, 34, 36, 44], "sortedcontain": 21, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 21, "cffi": 21, "b7": 21, "8b": 21, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 21, "15": [21, 22, 24, 25, 26, 33, 34, 36, 44, 47, 49], "442": 21, "pycpars": 21, "d5": 21, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 21, "118": [21, 36], "filenam": [21, 24, 25, 26, 27, 28, 29, 41, 42, 53, 55, 57], "size": [21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "687627": 21, "sha256": 21, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 21, "ephem": 21, "wwtk90_m": 21, "7a": 21, "8e": 21, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 21, "23704": 21, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 21, "9c": 21, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 21, "26098": 21, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 21, "f3": 21, "ed": 21, "2b": 21, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 21, "remot": 21, "enumer": 21, "12942": 21, "count": 21, "total": [21, 25, 26, 31, 33, 34, 36, 37, 39, 40, 47, 53, 54], "delta": 21, "reus": 21, "pack": [21, 49, 54, 55], "12875": 21, "receiv": 21, "mib": 21, "8835": 21, "41": [21, 24, 26, 31, 33, 44, 47], "dl_dir": [21, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "___________________________________________________": 21, "70m": 21, "1mb": 21, "718": 21, "compute_fbank_yesno": 21, "_______________________________________________________________________________": 21, "90": [21, 24], "82it": 21, "778": 21, "______________________________________________________________________________": 21, "256": [21, 26, 44, 45], "92it": 21, "51": [21, 24, 31, 36, 47], "66": [21, 25], "project": 21, "kaldilm": 21, "csrc": [21, 36], "arpa_file_pars": 21, "cc": 21, "void": 21, "arpafilepars": 21, "std": 21, "istream": 21, "79": 21, "92": [21, 36], "275": [21, 31], "compile_hlg": 21, "124": [21, 31, 36], "276": 21, "convert": [21, 24, 25, 26, 36], "309": 21, "ctc_topo": 21, "max_token_id": 21, "310": 21, "314": 21, "intersect": [21, 39, 54, 55], "323": 21, "lg": [21, 39, 42, 54, 55], "shape": [21, 26], "connect": [21, 22, 36, 39, 40, 53, 54, 55], "68": [21, 36], "class": [21, 36], "71": [21, 36, 40], "341": 21, "rag": 21, "raggedtensor": 21, "remov": [21, 31, 33, 34, 36, 40, 44, 45], "disambigu": 21, "354": 21, "91": 21, "remove_epsilon": 21, "445": 21, "arc": 21, "compos": 21, "h": 21, "446": 21, "447": 21, "segment": 21, "fault": 21, "dump": 21, "protocol_buffers_python_implement": 21, "674": 21, "interest": [21, 37, 39, 41, 42, 53, 54, 55], "936": 21, "481": 21, "482": 21, "world_siz": [21, 37], "master_port": 21, "12354": 21, "num_epoch": 21, "3fb0a43": 21, "thu": [21, 22, 24, 25, 26, 33, 36, 40], "05": [21, 22, 24, 25, 31, 33, 34, 36, 45, 49], "74279": [21, 22, 24, 25, 26, 33], "1220091118": 21, "57c4d55446": 21, "sph26": 21, "941": 21, "949": 21, "495": 21, "965": [21, 31], "146": 21, "244": 21, "967": 21, "149": [21, 24, 36], "199": [21, 36, 40], "singlecutsampl": 21, "205": [21, 36], "968": 21, "565": [21, 36], "422": 21, "loss": [21, 24, 25, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "065": 21, "over": [21, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "2436": 21, "frame": [21, 33, 39, 41, 54, 55], "tot_loss": 21, "681": [21, 24], "4561": 21, "2828": 21, "7076": 21, "22192": 21, "54": [21, 25, 26, 36, 40, 44, 45], "167": 21, "444": 21, "9002": 21, "18067": 21, "011": 21, "2555": 21, "2695": 21, "484": 21, "34971": 21, "331": [21, 24, 25, 36, 40], "4688": 21, "368": 21, "75": [21, 24], "633": 21, "2532": 21, "242": [21, 31, 36], "1139": 21, "1592": 21, "522": [21, 36], "1627": 21, "209": [21, 40], "07055": 21, "1175": 21, "07091": 21, "847": 21, "07731": 21, "427": [21, 25, 36], "04391": 21, "05341": 21, "884": 21, "04384": 21, "387": [21, 45], "03458": 21, "04616": 21, "707": [21, 31, 36], "03379": 21, "758": [21, 36], "433": [21, 36], "01054": 21, "980": [21, 36], "009014": 21, "009974": 21, "489": [21, 31], "01085": 21, "258": [21, 44, 45], "01172": 21, "01055": 21, "621": [21, 47], "01074": 21, "699": 21, "866": 21, "01044": 21, "844": 21, "008942": 21, "221": [21, 36], "01082": 21, "970": [21, 36], "01169": 21, "247": 21, "01073": 21, "326": [21, 25], "555": 21, "840": 21, "841": 21, "855": 21, "868": 21, "882": 21, "883": 21, "157": 21, "701": 21, "702": [21, 36], "704": [21, 31, 44], "fun": [21, 24, 25], "variou": [21, 27, 30, 58], "period": [22, 24], "disk": 22, "optim": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "resum": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "strip": 22, "reduc": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pruned_transducer_stateless3": [22, 28, 51], "almost": [22, 39, 51, 54, 55], "dict": [22, 26], "stateless3": [22, 24], "repo": [22, 27], "those": 22, "wave": [22, 24, 25, 26, 31, 36], "iter": [22, 24, 25, 26, 29, 39, 41, 42, 53, 54, 55], "1224000": 22, "greedy_search": [22, 33, 39, 41, 53, 54, 55], "test_wav": [22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47], "1089": [22, 24, 25, 26, 27, 36, 40], "134686": [22, 24, 25, 26, 27, 36, 40], "0001": [22, 24, 25, 26, 27, 36, 40], "1221": [22, 24, 25, 36, 40], "135766": [22, 24, 25, 36, 40], "0002": [22, 24, 25, 36, 40], "multipl": [22, 31, 33, 34, 36, 40, 44, 45, 47], "Its": [22, 24, 25, 26, 36], "233": [22, 24, 25], "265": 22, "3000": [22, 24, 25, 26], "80": [22, 24, 25, 26, 31, 33, 36], "subsampling_factor": [22, 25, 26, 31, 33, 36], "encoder_dim": [22, 24, 25, 26], "512": [22, 24, 25, 26, 31, 33, 36], "nhead": [22, 24, 26, 31, 33, 36, 39, 54], "dim_feedforward": [22, 24, 25, 33], "num_encoder_lay": [22, 24, 25, 26, 33], "decoder_dim": [22, 24, 25, 26], "joiner_dim": [22, 24, 25, 26], "model_warm_step": [22, 24, 25], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 22, "oct": [22, 36], "miss": [22, 24, 25, 26, 33, 36], "cu102": [22, 24, 25, 26], "1013": 22, "c39cba5": 22, "dirti": [22, 24, 25, 31, 36], "ceph": [22, 31, 33, 36], "0324160024": 22, "65bfd8b584": 22, "jjlbn": 22, "bpe_model": [22, 24, 25, 26, 36], "16000": [22, 31, 33, 34, 36, 40, 41, 44, 45], "max_context": 22, "max_stat": 22, "context_s": [22, 24, 25, 26, 33], "max_sym_per_fram": [22, 33], "simulate_stream": 22, "decode_chunk_s": 22, "left_context": 22, "dynamic_chunk_train": 22, "causal_convolut": 22, "short_chunk_s": [22, 26, 54, 55], "num_left_chunk": [22, 26], "blank_id": [22, 24, 25, 26, 33], "unk_id": 22, "271": [22, 25], "612": 22, "458": 22, "giga": [22, 25, 53], "623": 22, "277": 22, "78648040": 22, "951": [22, 36], "285": [22, 33, 36], "952": 22, "295": [22, 31, 33, 34, 36], "957": 22, "301": [22, 36], "700": 22, "329": [22, 25, 36], "388": 22, "earli": [22, 24, 25, 26, 36, 40], "nightfal": [22, 24, 25, 26, 36, 40], "THE": [22, 24, 25, 26, 36, 40], "yellow": [22, 24, 25, 26, 36, 40], "lamp": [22, 24, 25, 26, 36, 40], "light": [22, 24, 25, 26, 36, 40], "AND": [22, 24, 25, 26, 36, 40], "THERE": [22, 24, 25, 26, 36, 40], "squalid": [22, 24, 25, 26, 36, 40], "quarter": [22, 24, 25, 26, 36, 40], "OF": [22, 24, 25, 26, 36, 40], "brothel": [22, 24, 25, 26, 36, 40], "god": [22, 36, 40], "AS": [22, 36, 40], "direct": [22, 36, 40], "consequ": [22, 36, 40], "sin": [22, 36, 40], "man": [22, 36, 40], "punish": [22, 36, 40], "had": [22, 36, 40], "her": [22, 36, 40], "love": [22, 36, 40], "child": [22, 36, 40], "whose": [22, 33, 36, 40], "ON": [22, 24, 36, 40], "THAT": [22, 36, 40], "dishonor": [22, 36, 40], "bosom": [22, 36, 40], "TO": [22, 36, 40], "parent": [22, 36, 40], "forev": [22, 36, 40], "WITH": [22, 36, 40], "race": [22, 36, 40], "descent": [22, 36, 40], "mortal": [22, 36, 40], "BE": [22, 36, 40], "bless": [22, 36, 40], "soul": [22, 36, 40], "IN": [22, 36, 40], "heaven": [22, 36, 40], "yet": [22, 24, 25, 36, 40], "THESE": [22, 36, 40], "thought": [22, 36, 40], "affect": [22, 36, 40], "hester": [22, 36, 40], "prynn": [22, 36, 40], "hope": [22, 32, 36, 40], "apprehens": [22, 36, 40], "390": 22, "down": [22, 31, 36, 39, 41, 42, 53, 54, 55], "reproduc": [22, 36], "9999": [22, 41, 42, 53], "symlink": 22, "pass": [22, 26, 31, 33, 34, 36, 39, 41, 42, 51, 53, 54, 55], "zipform": [23, 27, 30, 35, 38, 49, 50, 52], "convemform": [23, 30, 51], "platform": [23, 27], "android": [23, 24, 25, 26, 27], "raspberri": [23, 27], "pi": [23, 27], "\u7231\u82af\u6d3e": 23, "maix": 23, "iii": 23, "axera": 23, "rv1126": 23, "static": 23, "binari": [23, 24, 25, 26, 31, 33, 34, 36, 39, 47, 53, 54], "pnnx": [23, 30], "encod": [23, 27, 29, 30, 31, 33, 34, 36, 39, 40, 41, 47, 51, 53, 54, 55], "conv": [24, 25], "emform": [24, 25, 28], "stateless2": [24, 25, 53], "pretrained_model": [24, 25, 26], "online_transduc": 24, "jit_xxx": [24, 25, 26], "anywher": [24, 25], "submodul": 24, "recurs": 24, "init": 24, "dcmake_build_typ": [24, 31, 36], "dncnn_python": 24, "dncnn_build_benchmark": 24, "dncnn_build_exampl": 24, "dncnn_build_tool": 24, "j4": 24, "pwd": 24, "src": [24, 26], "compon": [24, 51], "ncnn2int8": [24, 25], "am": 24, "sai": [24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "later": [24, 25, 26, 31, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "termin": 24, "tencent": [24, 25], "modif": [24, 33], "offici": 24, "synchron": 24, "renam": [24, 25, 26], "conv_emformer_transducer_stateless2": [24, 51], "length": [24, 26, 33, 49, 54, 55], "cnn": [24, 26], "31": [24, 25, 26, 36], "context": [24, 33, 39, 51, 53, 54, 55], "configur": [24, 26, 33, 37, 40, 44, 45, 47, 57], "accordingli": [24, 25, 26], "yourself": [24, 25, 26, 37, 54, 55], "220": [24, 33, 34, 36], "229": [24, 31], "best_v": 24, "alid_epoch": 24, "subsampl": [24, 54, 55], "ing_factor": 24, "a34171ed85605b0926eebbd0463d059431f4f74a": 24, "dec": 24, "ver": 24, "ion": 24, "530e8a1": 24, "op": 24, "1220120619": [24, 25, 26], "7695ff496b": [24, 25, 26], "s9n4w": [24, 25, 26], "icefa": 24, "ll": 24, "transdu": 24, "cer": 24, "use_averaged_model": [24, 25, 26], "cnn_module_kernel": [24, 26], "left_context_length": 24, "chunk_length": 24, "right_context_length": 24, "memory_s": 24, "231": [24, 25, 26], "053": 24, "022": 24, "708": [24, 31, 33, 36, 47], "75490012": 24, "320": [24, 33], "682": 24, "lh": [24, 25, 26], "rw": [24, 25, 26], "289m": 24, "jan": [24, 25, 26], "289": 24, "roughli": [24, 25, 26], "equal": [24, 25, 26, 54, 55], "1024": [24, 25, 26, 53], "287": [24, 47], "1010k": [24, 25], "decoder_jit_trac": [24, 25, 26, 29, 53, 55], "283m": 24, "encoder_jit_trac": [24, 25, 26, 29, 53, 55], "0m": [24, 25], "joiner_jit_trac": [24, 25, 26, 29, 53, 55], "sure": [24, 25, 26], "found": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "param": [24, 25, 26], "503k": [24, 25], "437": [24, 25, 26], "142m": 24, "79k": 24, "5m": [24, 25], "architectur": [24, 25, 26, 53], "editor": [24, 25, 26], "content": [24, 25, 26], "283": [24, 26], "1010": [24, 25], "503": [24, 25], "convers": [24, 25, 26], "half": [24, 25, 26, 39, 54, 55], "v": [24, 25, 26, 36, 44, 45], "float16": [24, 25, 26], "occupi": [24, 25, 26], "twice": [24, 25, 26], "smaller": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "fp16": [24, 25, 26, 39, 41, 42, 49, 53, 54, 55, 57], "won": [24, 25, 26, 27, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "accept": [24, 25, 26], "216": [24, 31, 36, 44, 45], "encoder_param_filenam": [24, 25, 26], "encoder_bin_filenam": [24, 25, 26], "decoder_param_filenam": [24, 25, 26], "decoder_bin_filenam": [24, 25, 26], "joiner_param_filenam": [24, 25, 26], "joiner_bin_filenam": [24, 25, 26], "sound_filenam": [24, 25, 26], "141": 24, "328": 24, "336": 24, "106000": [24, 25, 26, 36, 40], "581": [24, 40], "381": 24, "7767517": [24, 25, 26], "1060": 24, "1342": 24, "in0": [24, 25, 26], "explan": [24, 25, 26], "magic": [24, 25, 26], "intermedi": [24, 25, 26], "increment": [24, 25, 26], "1061": 24, "sherpametadata": [24, 25, 26], "sherpa_meta_data1": [24, 25, 26], "newli": [24, 25, 26], "must": [24, 25, 26, 54], "eas": [24, 25, 26], "pair": [24, 25, 26], "sad": [24, 25, 26], "rememb": [24, 25, 26], "anymor": [24, 25, 26], "flexibl": [24, 25, 26], "edit": [24, 25, 26], "arm": [24, 25, 26], "aarch64": [24, 25, 26], "onc": [24, 25], "mayb": [24, 25], "year": [24, 25], "_jit_trac": [24, 25], "fp32": [24, 25], "doubl": [24, 25], "j": [24, 25, 31, 36], "py38": [24, 25, 26], "arg": [24, 25], "wave_filenam": [24, 25], "16k": [24, 25], "hz": [24, 25, 44, 45], "mono": [24, 25], "calibr": [24, 25], "cat": [24, 25], "eof": [24, 25], "calcul": [24, 25, 41, 54, 55], "has_gpu": [24, 25], "config": [24, 25], "use_vulkan_comput": [24, 25], "88": [24, 33], "conv_87": 24, "942385": [24, 25], "threshold": [24, 25, 41], "938493": 24, "968131": 24, "conv_88": 24, "442448": 24, "549335": 24, "167552": 24, "conv_89": 24, "228289": 24, "001738": 24, "871552": 24, "linear_90": 24, "976146": 24, "101789": 24, "267128": 24, "linear_91": 24, "962030": 24, "162033": 24, "602713": 24, "linear_92": 24, "323041": 24, "853959": 24, "953129": 24, "linear_94": 24, "905416": 24, "648006": 24, "323545": 24, "linear_93": 24, "474093": 24, "200188": 24, "linear_95": 24, "888012": 24, "403563": 24, "483986": 24, "linear_96": 24, "856741": 24, "398679": 24, "524273": 24, "linear_97": 24, "635942": 24, "613655": 24, "590950": 24, "linear_98": 24, "460340": 24, "670146": 24, "398010": 24, "linear_99": 24, "532276": 24, "585537": 24, "119396": 24, "linear_101": 24, "585871": 24, "719224": 24, "205809": 24, "linear_100": 24, "751382": 24, "081648": 24, "linear_102": 24, "593344": 24, "450581": 24, "551147": 24, "linear_103": 24, "592681": 24, "705824": 24, "257959": 24, "linear_104": 24, "752957": 24, "980955": 24, "110489": 24, "linear_105": 24, "696240": 24, "877193": 24, "608953": 24, "linear_106": 24, "059659": 24, "643138": 24, "048950": 24, "linear_108": 24, "975461": 24, "589567": 24, "671457": 24, "linear_107": 24, "190381": 24, "515701": 24, "linear_109": 24, "710759": 24, "305635": 24, "082436": 24, "linear_110": 24, "531228": 24, "731162": 24, "159557": 24, "linear_111": 24, "528083": 24, "259322": 24, "211544": 24, "linear_112": 24, "148807": 24, "500842": 24, "087374": 24, "linear_113": 24, "592566": 24, "948851": 24, "166611": 24, "linear_115": 24, "437109": 24, "608947": 24, "642395": 24, "linear_114": 24, "193942": 24, "503904": 24, "linear_116": 24, "966980": 24, "200896": 24, "676392": 24, "linear_117": 24, "451303": 24, "061664": 24, "951344": 24, "linear_118": 24, "077262": 24, "965800": 24, "023804": 24, "linear_119": 24, "671615": 24, "847613": 24, "198460": 24, "linear_120": 24, "625638": 24, "131427": 24, "556595": 24, "linear_122": 24, "274080": 24, "888716": 24, "978189": 24, "linear_121": 24, "420480": 24, "429659": 24, "linear_123": 24, "826197": 24, "599617": 24, "281532": 24, "linear_124": 24, "396383": 24, "325849": 24, "335875": 24, "linear_125": 24, "337198": 24, "941410": 24, "221970": 24, "linear_126": 24, "699965": 24, "842878": 24, "224073": 24, "linear_127": 24, "775370": 24, "884215": 24, "696438": 24, "linear_129": 24, "872276": 24, "837319": 24, "254213": 24, "linear_128": 24, "180057": 24, "687883": 24, "linear_130": 24, "150427": 24, "454298": 24, "765789": 24, "linear_131": 24, "112692": 24, "924847": 24, "025545": 24, "linear_132": 24, "852893": 24, "116593": 24, "749626": 24, "linear_133": 24, "517084": 24, "024665": 24, "275314": 24, "linear_134": 24, "683807": 24, "878618": 24, "743618": 24, "linear_136": 24, "421055": 24, "322729": 24, "086264": 24, "linear_135": 24, "309880": 24, "917679": 24, "linear_137": 24, "827781": 24, "744595": 24, "915554": 24, "linear_138": 24, "422395": 24, "742882": 24, "402161": 24, "linear_139": 24, "527538": 24, "866123": 24, "849449": 24, "linear_140": 24, "128619": 24, "657793": 24, "266134": 24, "linear_141": 24, "839593": 24, "845993": 24, "021378": 24, "linear_143": 24, "442304": 24, "099039": 24, "889746": 24, "linear_142": 24, "325038": 24, "849592": 24, "linear_144": 24, "929444": 24, "618206": 24, "605080": 24, "linear_145": 24, "382126": 24, "321095": 24, "625010": 24, "linear_146": 24, "894987": 24, "867645": 24, "836517": 24, "linear_147": 24, "915313": 24, "906028": 24, "886522": 24, "linear_148": 24, "614287": 24, "908151": 24, "496181": 24, "linear_150": 24, "724932": 24, "485588": 24, "312899": 24, "linear_149": 24, "161146": 24, "606939": 24, "linear_151": 24, "164453": 24, "847355": 24, "719223": 24, "linear_152": 24, "086471": 24, "984121": 24, "222834": 24, "linear_153": 24, "099524": 24, "991601": 24, "816805": 24, "linear_154": 24, "054585": 24, "489706": 24, "286930": 24, "linear_155": 24, "389185": 24, "100321": 24, "963501": 24, "linear_157": 24, "982999": 24, "154796": 24, "637253": 24, "linear_156": 24, "537706": 24, "875190": 24, "linear_158": 24, "420287": 24, "502287": 24, "531588": 24, "linear_159": 24, "014746": 24, "423280": 24, "477261": 24, "linear_160": 24, "633553": 24, "715335": 24, "220921": 24, "linear_161": 24, "371849": 24, "117830": 24, "815203": 24, "linear_162": 24, "492933": 24, "126283": 24, "623318": 24, "linear_164": 24, "697504": 24, "825712": 24, "317358": 24, "linear_163": 24, "078367": 24, "008038": 24, "linear_165": 24, "023975": 24, "836278": 24, "577358": 24, "linear_166": 24, "860619": 24, "259792": 24, "493614": 24, "linear_167": 24, "380934": 24, "496160": 24, "107042": 24, "linear_168": 24, "691216": 24, "733317": 24, "831076": 24, "linear_169": 24, "723948": 24, "952728": 24, "129707": 24, "linear_171": 24, "034811": 24, "366547": 24, "665123": 24, "linear_170": 24, "356277": 24, "710501": 24, "linear_172": 24, "556884": 24, "729481": 24, "166058": 24, "linear_173": 24, "033039": 24, "207264": 24, "442120": 24, "linear_174": 24, "597379": 24, "658676": 24, "768131": 24, "linear_2": [24, 25], "293503": 24, "305265": 24, "877850": 24, "linear_1": [24, 25], "812222": 24, "766452": 24, "487047": 24, "linear_3": [24, 25], "999999": 24, "999755": 24, "031174": 24, "wish": [24, 25], "955k": 24, "18k": 24, "inparam": [24, 25], "inbin": [24, 25], "outparam": [24, 25], "outbin": [24, 25], "99m": 24, "78k": 24, "774k": [24, 25], "496": [24, 25, 36, 40], "replac": [24, 25], "774": [24, 25], "linear": [24, 25, 33], "convolut": [24, 25, 41, 51, 54], "exact": [24, 25], "4x": [24, 25], "comparison": 24, "468000": [25, 29, 53], "lstm_transducer_stateless2": [25, 29, 53], "862": 25, "222": [25, 34, 36], "865": 25, "is_pnnx": 25, "62e404dd3f3a811d73e424199b3408e309c06e1a": [25, 26], "6d7a559": [25, 26], "feb": [25, 26, 33], "147": [25, 26], "rnn_hidden_s": 25, "aux_layer_period": 25, "235": 25, "239": [25, 33], "472": 25, "595": 25, "324": 25, "83137520": 25, "596": 25, "325": 25, "257024": 25, "781812": 25, "327": 25, "84176356": 25, "182": [25, 26, 31, 40], "158": 25, "183": [25, 44, 45], "335": 25, "101": 25, "tracerwarn": [25, 26], "boolean": [25, 26], "caus": [25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "incorrect": [25, 26, 33], "flow": [25, 26], "constant": [25, 26], "futur": [25, 26, 33, 58], "need_pad": 25, "bool": 25, "259": [25, 31], "339": 25, "207": [25, 34, 36], "84": [25, 31], "324m": 25, "321": [25, 31], "107": [25, 40], "318m": 25, "159m": 25, "21k": 25, "159": [25, 36, 47], "37": [25, 31, 33, 36, 44], "861": 25, "266": [25, 26, 36, 40], "431": 25, "342": 25, "343": 25, "379": 25, "268": [25, 36, 40], "317m": 25, "317": 25, "conv_15": 25, "930708": 25, "972025": 25, "conv_16": 25, "978855": 25, "031788": 25, "456645": 25, "conv_17": 25, "868437": 25, "830528": 25, "218575": 25, "linear_18": 25, "107259": 25, "194808": 25, "293236": 25, "linear_19": 25, "193777": 25, "634748": 25, "401705": 25, "linear_20": 25, "259933": 25, "606617": 25, "722160": 25, "linear_21": 25, "186600": 25, "790260": 25, "512129": 25, "linear_22": 25, "759041": 25, "265832": 25, "050053": 25, "linear_23": 25, "931209": 25, "099090": 25, "979767": 25, "linear_24": 25, "324160": 25, "215561": 25, "321835": 25, "linear_25": 25, "800708": 25, "599352": 25, "284134": 25, "linear_26": 25, "492444": 25, "153369": 25, "274391": 25, "linear_27": 25, "660161": 25, "720994": 25, "674126": 25, "linear_28": 25, "415265": 25, "174434": 25, "007133": 25, "linear_29": 25, "038418": 25, "118534": 25, "724262": 25, "linear_30": 25, "072084": 25, "936867": 25, "259155": 25, "linear_31": 25, "342712": 25, "599489": 25, "282787": 25, "linear_32": 25, "340535": 25, "120308": 25, "701103": 25, "linear_33": 25, "846987": 25, "630030": 25, "985939": 25, "linear_34": 25, "686298": 25, "204571": 25, "607586": 25, "linear_35": 25, "904821": 25, "575518": 25, "756420": 25, "linear_36": 25, "806659": 25, "585589": 25, "118401": 25, "linear_37": 25, "402340": 25, "047157": 25, "162680": 25, "linear_38": 25, "174589": 25, "923361": 25, "030258": 25, "linear_39": 25, "178576": 25, "556058": 25, "807705": 25, "linear_40": 25, "901954": 25, "301267": 25, "956539": 25, "linear_41": 25, "839805": 25, "597429": 25, "716181": 25, "linear_42": 25, "178945": 25, "651595": 25, "895699": 25, "829245": 25, "627592": 25, "637907": 25, "746186": 25, "255032": 25, "167313": 25, "000000": 25, "999756": 25, "031013": 25, "345k": 25, "17k": 25, "218m": 25, "counterpart": 25, "bit": [25, 31, 33, 34, 36, 40, 47], "4532": 25, "feedforward": [26, 33, 39, 54], "384": [26, 36], "unmask": 26, "downsampl": [26, 32], "factor": [26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "473": [26, 36], "246": [26, 33, 36, 44, 45], "477": 26, "warm_step": 26, "2000": [26, 34], "feedforward_dim": 26, "attention_dim": [26, 31, 33, 36], "encoder_unmasked_dim": 26, "zipformer_downsampling_factor": 26, "decode_chunk_len": 26, "257": [26, 33, 44, 45], "023": 26, "zipformer2": 26, "419": 26, "At": [26, 31, 36], "stack": 26, "downsampling_factor": 26, "037": 26, "655": 26, "346": 26, "68944004": 26, "347": 26, "260096": 26, "348": [26, 44], "716276": 26, "656": [26, 36], "349": 26, "69920376": 26, "351": 26, "353": 26, "174": [26, 36], "175": 26, "1344": 26, "assert": 26, "cached_len": 26, "num_lay": 26, "1348": 26, "cached_avg": 26, "1352": 26, "cached_kei": 26, "1356": 26, "cached_v": 26, "1360": 26, "cached_val2": 26, "1364": 26, "cached_conv1": 26, "1368": 26, "cached_conv2": 26, "1373": 26, "left_context_len": 26, "1884": 26, "x_size": 26, "2442": 26, "2449": 26, "2469": 26, "2473": 26, "2483": 26, "kv_len": 26, "k": [26, 39, 44, 45, 53, 54, 55], "2570": 26, "attn_output": 26, "bsz": 26, "num_head": 26, "seq_len": 26, "head_dim": 26, "2926": 26, "lorder": 26, "2652": 26, "2653": 26, "embed_dim": 26, "2666": 26, "1543": 26, "in_x_siz": 26, "1637": 26, "1643": 26, "in_channel": 26, "1571": 26, "1763": 26, "src1": 26, "src2": 26, "1779": 26, "dim1": 26, "1780": 26, "dim2": 26, "_trace": 26, "958": 26, "tracer": 26, "instead": [26, 33, 54], "tupl": 26, "namedtupl": 26, "absolut": 26, "know": [26, 37], "side": 26, "allow": [26, 39, 54], "behavior": [26, 33], "_c": 26, "_create_method_from_trac": 26, "646": 26, "357": 26, "102": [26, 31], "embedding_out": 26, "686": 26, "361": [26, 36, 40], "735": 26, "69": 26, "269m": 26, "269": [26, 31, 44, 45], "725": [26, 40], "1022k": 26, "266m": 26, "8m": 26, "509k": 26, "133m": 26, "152k": 26, "4m": 26, "1022": 26, "133": 26, "509": 26, "360": 26, "365": 26, "280": [26, 36], "372": [26, 31], "state": [26, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "026": 26, "410": 26, "411": [26, 36], "2028": 26, "2547": 26, "2029": 26, "23316": 26, "23317": 26, "23318": 26, "23319": 26, "23320": 26, "amount": [26, 32], "pad": [26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "conv2dsubsampl": 26, "arrai": 26, "23300": 26, "element": 26, "repo_url": 27, "basenam": 27, "why": 28, "streaming_asr": [28, 29, 53, 54, 55], "conv_emform": 28, "offline_asr": [28, 39], "baz": 29, "1best": [31, 34, 36, 40, 41, 42, 44, 45], "automag": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "stop": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "By": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "musan": [31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "apt": [31, 34], "permiss": [31, 34], "commandlin": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "experi": [31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "world": [31, 33, 34, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55, 57], "multi": [31, 33, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "machin": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "ddp": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "implement": [31, 33, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "utter": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "oom": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "due": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "decai": [31, 34, 36, 41, 42, 53], "warmup": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "function": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "get_param": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "directli": [31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "perturb": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "3x150": [31, 33, 34], "450": [31, 33, 34], "visual": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "logdir": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "labelsmooth": 31, "tensorflow": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "press": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "ctrl": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "engw8ksktzqs24zbv5dgcg": 31, "2021": [31, 34, 36, 40, 44, 45, 47], "22t11": 31, "scan": [31, 33, 34, 36, 39, 47, 53, 54], "116068": 31, "scalar": [31, 33, 34, 36, 39, 47, 53, 54], "listen": [31, 33, 34, 39, 47, 53, 54], "xxxx": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "saw": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "consol": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "avoid": [31, 33, 36], "nbest": [31, 36, 42], "lattic": [31, 34, 36, 39, 40, 44, 45, 54, 55], "uniqu": [31, 36, 39, 54, 55], "pkufool": [31, 34, 40], "icefall_asr_aishell_conformer_ctc": 31, "transcrib": [31, 33, 34, 36], "v1": [31, 34, 36, 40, 44, 45], "lang_char": [31, 33], "bac009s0764w0121": [31, 33, 34], "bac009s0764w0122": [31, 33, 34], "bac009s0764w0123": [31, 33, 34], "tran": [31, 34, 36, 40, 44, 45], "graph": [31, 34, 36, 39, 40, 44, 45, 54, 55], "conveni": [31, 34, 36, 37], "eo": [31, 34, 36], "soxi": [31, 33, 34, 36, 40, 47], "sampl": [31, 33, 34, 36, 40, 41, 47, 54, 55], "precis": [31, 33, 34, 36, 39, 40, 47, 54, 55], "67263": [31, 33, 34], "cdda": [31, 33, 34, 36, 40, 47], "sector": [31, 33, 34, 36, 40, 47], "135k": [31, 33, 34], "256k": [31, 33, 34, 36], "sign": [31, 33, 34, 36, 47], "integ": [31, 33, 34, 36, 47], "pcm": [31, 33, 34, 36, 47], "65840": [31, 33, 34], "308": [31, 33, 34], "625": [31, 33, 34], "132k": [31, 33, 34], "64000": [31, 33, 34], "300": [31, 33, 34, 36, 37, 39, 49, 54], "128k": [31, 33, 34, 47], "displai": [31, 33, 34, 36], "topologi": [31, 36], "num_decoder_lay": [31, 36], "vgg_frontend": [31, 33, 36], "use_feat_batchnorm": [31, 36], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 31, "sun": 31, "sep": 31, "33cfe45": 31, "d57a873": 31, "nov": [31, 36], "hw": 31, "kangwei": 31, "icefall_aishell3": 31, "k2_releas": 31, "tokens_fil": 31, "num_path": [31, 36, 39, 54, 55], "ngram_lm_scal": [31, 36], "attention_decoder_scal": [31, 36], "nbest_scal": [31, 36], "sos_id": [31, 36], "eos_id": [31, 36], "4336": [31, 33], "131": [31, 36], "293": [31, 36], "369": [31, 36], "\u751a": [31, 33], "\u81f3": [31, 33], "\u51fa": [31, 33], "\u73b0": [31, 33], "\u4ea4": [31, 33], "\u6613": [31, 33], "\u51e0": [31, 33], "\u4e4e": [31, 33], "\u505c": [31, 33], "\u6b62": 31, "\u7684": [31, 33, 34], "\u60c5": [31, 33], "\u51b5": [31, 33], "\u4e00": [31, 33], "\u4e8c": [31, 33], "\u7ebf": [31, 33, 34], "\u57ce": [31, 33], "\u5e02": [31, 33], "\u867d": [31, 33], "\u7136": [31, 33], "\u4e5f": [31, 33, 34], "\u5904": [31, 33], "\u4e8e": [31, 33], "\u8c03": [31, 33], "\u6574": [31, 33], "\u4e2d": [31, 33, 34], "\u4f46": [31, 33, 34], "\u56e0": [31, 33], "\u4e3a": [31, 33], "\u805a": [31, 33], "\u96c6": [31, 33], "\u4e86": [31, 33, 34], "\u8fc7": [31, 33], "\u591a": [31, 33], "\u516c": [31, 33], "\u5171": [31, 33], "\u8d44": [31, 33], "\u6e90": [31, 33], "371": 31, "683": 31, "684": [31, 47], "651": [31, 47], "654": 31, "659": 31, "752": 31, "887": 31, "340": 31, "370": 31, "\u751a\u81f3": [31, 34], "\u51fa\u73b0": [31, 34], "\u4ea4\u6613": [31, 34], "\u51e0\u4e4e": [31, 34], "\u505c\u6b62": 31, "\u60c5\u51b5": [31, 34], "\u4e00\u4e8c": [31, 34], "\u57ce\u5e02": [31, 34], "\u867d\u7136": [31, 34], "\u5904\u4e8e": [31, 34], "\u8c03\u6574": [31, 34], "\u56e0\u4e3a": [31, 34], "\u805a\u96c6": [31, 34], "\u8fc7\u591a": [31, 34], "\u516c\u5171": [31, 34], "\u8d44\u6e90": [31, 34], "recor": [31, 36], "highest": [31, 36], "966": 31, "821": 31, "822": 31, "826": 31, "916": 31, "345": 31, "889": 31, "limit": [31, 33, 36, 51, 54], "upgrad": [31, 36], "NOT": [31, 33, 36, 47], "checkout": [31, 36], "hlg_decod": [31, 36], "four": [31, 36], "messag": [31, 36, 39, 41, 42, 53, 54, 55], "use_gpu": [31, 36], "word_tabl": [31, 36], "forward": [31, 36, 41], "cu": [31, 36], "int": [31, 36], "char": [31, 36], "98": 31, "150": [31, 36], "693": [31, 44], "165": [31, 36], "nnet_output": [31, 36], "185": [31, 36, 47], "217": [31, 36], "mandarin": 32, "beij": 32, "shell": 32, "technologi": 32, "ltd": 32, "peopl": 32, "accent": 32, "area": 32, "china": 32, "invit": 32, "particip": 32, "conduct": 32, "indoor": 32, "high": 32, "fidel": 32, "microphon": 32, "16khz": 32, "manual": 32, "through": 32, "profession": 32, "annot": 32, "inspect": 32, "free": [32, 37, 49, 53], "academ": 32, "moder": 32, "research": 32, "openslr": [32, 49], "ctc": [32, 35, 38, 42, 43, 46], "stateless": [32, 35, 39, 53, 54, 55], "conv1d": [33, 39, 53, 54, 55], "tanh": 33, "borrow": 33, "ieeexplor": 33, "ieee": 33, "stamp": 33, "jsp": 33, "arnumb": 33, "9054419": 33, "predict": [33, 37, 39, 53, 54, 55], "charact": 33, "unit": 33, "vocabulari": 33, "87939824": 33, "optimized_transduc": 33, "technqiu": 33, "end": [33, 39, 41, 42, 47, 53, 54, 55, 57], "maximum": 33, "emit": 33, "simplifi": [33, 51], "significantli": 33, "degrad": 33, "exactli": 33, "unprun": 33, "advantag": 33, "minim": 33, "pruned_transducer_stateless": [33, 39, 51, 54], "altern": 33, "though": 33, "transducer_stateless_modifi": 33, "pr": 33, "ram": 33, "tri": 33, "prob": [33, 53], "219": [33, 36], "lagz6hrcqxoigbfd5e0y3q": 33, "03t14": 33, "8477": 33, "250": [33, 40], "sym": [33, 39, 54, 55], "beam_search": [33, 39, 54, 55], "decoding_method": 33, "beam_4": 33, "ensur": 33, "poor": 33, "531": [33, 34], "994": [33, 36], "027": 33, "encoder_out_dim": 33, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 33, "50d2281": 33, "mar": 33, "0815224919": 33, "75d558775b": 33, "mmnv8": 33, "72": [33, 36], "878": [33, 45], "880": 33, "891": 33, "113": [33, 36], "userwarn": 33, "__floordiv__": 33, "round": 33, "toward": 33, "trunc": 33, "floor": 33, "keep": [33, 39, 54, 55], "div": 33, "b": [33, 36, 44, 45], "rounding_mod": 33, "divis": 33, "x_len": 33, "163": [33, 36], "\u6ede": 33, "322": 33, "759": 33, "760": 33, "919": 33, "922": 33, "929": 33, "046": 33, "319": [33, 36], "798": 33, "831": [33, 45], "215": [33, 36, 40], "402": 33, "topk_hyp_index": 33, "topk_index": 33, "logit": 33, "583": [33, 45], "lji9mwuorlow3jkdhxwk8a": 34, "13t11": 34, "4454": 34, "icefall_asr_aishell_tdnn_lstm_ctc": 34, "858": [34, 36], "389": [34, 36], "161": [34, 36], "536": 34, "539": 34, "917": 34, "\u505c\u6ede": 34, "mmi": [35, 38], "blank": [35, 38], "skip": [35, 37, 38, 39, 53, 54, 55], "distil": [35, 38], "hubert": [35, 38], "ligru": [35, 43], "full": [36, 37, 39, 41, 42, 53, 54, 55], "libri": [36, 37, 39, 41, 42, 53, 54, 55], "subset": [36, 39, 41, 42, 53, 54, 55], "3x960": [36, 39, 41, 42, 53, 54, 55], "2880": [36, 39, 41, 42, 53, 54, 55], "lzgnetjwrxc3yghnmd4kpw": 36, "24t16": 36, "4540": 36, "sentenc": [36, 49], "piec": 36, "And": [36, 39, 41, 42, 53, 54, 55], "neither": 36, "nor": 36, "5000": 36, "033": 36, "537": 36, "full_libri": [36, 37], "464": 36, "548": 36, "776": 36, "652": [36, 47], "109226120": 36, "714": [36, 44], "206": 36, "944": 36, "1328": 36, "443": [36, 40], "2563": 36, "494": 36, "592": 36, "1715": 36, "52576": 36, "128": 36, "1424": 36, "807": 36, "506": 36, "808": [36, 44], "362": 36, "1477": 36, "2922": 36, "4295": 36, "52343": 36, "396": 36, "3584": 36, "432": 36, "680": [36, 44], "_pickl": 36, "unpicklingerror": 36, "invalid": 36, "hlg_modifi": 36, "g_4_gram": [36, 40, 44, 45], "sentencepiec": 36, "875": [36, 40], "212k": 36, "267440": [36, 40], "1253": [36, 40], "535k": 36, "77200": [36, 40], "154k": 36, "554": 36, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 36, "8d93169": 36, "601": 36, "025": 36, "broffel": 36, "osom": 36, "723": 36, "775": 36, "881": 36, "571": 36, "whole": [36, 40, 44, 45, 54, 55], "857": 36, "979": 36, "055": 36, "117": 36, "051": 36, "363": 36, "959": [36, 45], "546": 36, "598": 36, "599": [36, 40], "833": 36, "834": 36, "915": 36, "076": 36, "110": 36, "397": 36, "999": [36, 39, 54, 55], "concaten": 36, "bucket": 36, "sampler": 36, "1000": [36, 57], "ctc_decod": 36, "ngram_lm_rescor": 36, "attention_rescor": 36, "105": 36, "125": [36, 47], "228": 36, "543": 36, "topo": 36, "547": 36, "729": 36, "703": 36, "545": 36, "122": 36, "126": 36, "135": [36, 47], "153": [36, 47], "945": 36, "475": 36, "191": [36, 44, 45], "398": 36, "515": 36, "deseri": 36, "441": 36, "fsaclass": 36, "loadfsa": 36, "const": 36, "string": 36, "c10": 36, "ignor": 36, "589": 36, "attention_scal": 36, "162": 36, "169": [36, 44, 45], "188": 36, "984": 36, "624": 36, "519": [36, 45], "632": 36, "645": [36, 47], "243": 36, "303": 36, "179": 36, "knowledg": 37, "vector": 37, "mvq": 37, "kd": 37, "pruned_transducer_stateless4": [37, 39, 51, 54], "theoret": 37, "applic": 37, "minor": 37, "stop_stag": [37, 57], "thing": 37, "distillation_with_hubert": 37, "Of": 37, "cours": 37, "xl": 37, "proce": 37, "960h": [37, 41], "use_extracted_codebook": 37, "augment": 37, "th": [37, 44, 45], "fine": 37, "embedding_lay": 37, "num_codebook": 37, "under": [37, 49], "vq_fbank_layer36_cb8": 37, "whola": 37, "snippet": 37, "echo": 37, "awk": 37, "split": 37, "pruned_transducer_stateless6": 37, "12359": 37, "spec": 37, "warp": 37, "paid": 37, "suitabl": [39, 53, 54, 55], "pruned_transducer_stateless2": [39, 51, 54], "pruned_transducer_stateless5": [39, 51, 54], "scroll": [39, 41, 42, 53, 54, 55], "arxiv": [39, 53, 54, 55], "2206": [39, 53, 54, 55], "13236": [39, 53, 54, 55], "rework": [39, 51, 54], "daniel": [39, 54, 55], "joint": [39, 53, 54, 55], "contrari": [39, 53, 54, 55], "convent": [39, 53, 54, 55], "recurr": [39, 53, 54, 55], "2x": [39, 54, 55], "littl": [39, 54], "436000": [39, 41, 42, 53, 54, 55], "438000": [39, 41, 42, 53, 54, 55], "qogspbgsr8kzcrmmie9jgw": 39, "20t15": [39, 53, 54], "4468": [39, 53, 54], "210171": [39, 53, 54], "access": [39, 41, 42, 53, 54, 55], "googl": [39, 41, 42, 53, 54, 55], "6008": [39, 41, 42, 53, 54, 55], "localhost": [39, 41, 42, 53, 54, 55], "expos": [39, 41, 42, 53, 54, 55], "proxi": [39, 41, 42, 53, 54, 55], "bind_al": [39, 41, 42, 53, 54, 55], "fast_beam_search": [39, 41, 53, 54, 55], "474000": [39, 53, 54, 55], "largest": [39, 54, 55], "posterior": [39, 41, 54, 55], "algorithm": [39, 54, 55], "pdf": [39, 42, 54, 55], "1211": [39, 54, 55], "3711": [39, 54, 55], "espnet": [39, 54, 55], "net": [39, 54, 55], "beam_search_transduc": [39, 54, 55], "basic": [39, 54], "topk": [39, 54, 55], "expand": [39, 54, 55], "mode": [39, 54, 55], "being": [39, 54, 55], "hardcod": [39, 54, 55], "composit": [39, 54, 55], "log_prob": [39, 54, 55], "hard": [39, 51, 54, 55], "2211": [39, 54, 55], "00484": [39, 54, 55], "fast_beam_search_lg": [39, 54, 55], "trivial": [39, 54, 55], "fast_beam_search_nbest": [39, 54, 55], "random_path": [39, 54, 55], "shortest": [39, 54, 55], "fast_beam_search_nbest_lg": [39, 54, 55], "logic": [39, 54, 55], "smallest": [39, 53, 54, 55], "normal": [40, 44, 45, 47, 54], "icefall_asr_librispeech_tdnn": 40, "lstm_ctc": 40, "flac": 40, "116k": 40, "140k": 40, "343k": 40, "164k": 40, "105k": 40, "174k": 40, "pretraind": 40, "584": [40, 45], "791": 40, "245": 40, "098": 40, "099": 40, "methond": [40, 44, 45], "631": 40, "010": 40, "guidanc": 41, "bigger": 41, "simpli": 41, "discard": 41, "prevent": 41, "lconv": 41, "encourag": [41, 42, 53], "stabil": [41, 42], "doesn": 41, "warm": [41, 42], "xyozukpeqm62hbilud4upa": [41, 42], "ctc_guide_decode_b": 41, "pretrained_ctc": 41, "jit_pretrained_ctc": 41, "100h": 41, "yfyeung": 41, "wechat": 42, "zipformer_mmi": 42, "worker": [42, 53], "hp": 42, "tdnn_ligru_ctc": 44, "enough": [44, 45, 47, 49], "luomingshuang": [44, 45], "icefall_asr_timit_tdnn_ligru_ctc": 44, "pretrained_average_9_25": 44, "fdhc0_si1559": [44, 45], "felc0_si756": [44, 45], "fmgd0_si1564": [44, 45], "ffprobe": [44, 45], "show_format": [44, 45], "nistspher": [44, 45], "database_id": [44, 45], "database_vers": [44, 45], "utterance_id": [44, 45], "dhc0_si1559": [44, 45], "sample_min": [44, 45], "4176": [44, 45], "sample_max": [44, 45], "5984": [44, 45], "bitrat": [44, 45], "pcm_s16le": [44, 45], "s16": [44, 45], "elc0_si756": [44, 45], "1546": [44, 45], "1989": [44, 45], "mgd0_si1564": [44, 45], "7626": [44, 45], "10573": [44, 45], "660": 44, "695": 44, "697": 44, "819": 44, "829": 44, "sil": [44, 45], "dh": [44, 45], "ih": [44, 45], "uw": [44, 45], "ah": [44, 45], "ii": [44, 45], "z": [44, 45], "aa": [44, 45], "ei": [44, 45], "dx": [44, 45], "d": [44, 45, 49], "uh": [44, 45], "ng": [44, 45], "eh": [44, 45], "jh": [44, 45], "er": [44, 45], "ai": [44, 45], "hh": [44, 45], "aw": 44, "ae": [44, 45], "705": 44, "715": 44, "720": 44, "251": [44, 45], "ch": 44, "icefall_asr_timit_tdnn_lstm_ctc": 45, "pretrained_average_16_25": 45, "816": 45, "827": 45, "unk": 45, "739": 45, "977": 45, "978": 45, "981": 45, "ow": 45, "ykubhb5wrmosxykid1z9eg": 47, "23t23": 47, "icefall_asr_yesno_tdnn": 47, "0_0_1_0_0_1_1_1": 47, "0_0_1_0_1_0_0_1": 47, "0_0_1_1_0_0_0_1": 47, "0_0_1_1_0_1_1_0": 47, "0_0_1_1_1_0_0_0": 47, "0_0_1_1_1_1_0_0": 47, "0_1_0_0_0_1_0_0": 47, "0_1_0_0_1_0_1_0": 47, "0_1_0_1_0_0_0_0": 47, "0_1_0_1_1_1_0_0": 47, "0_1_1_0_0_1_1_1": 47, "0_1_1_1_0_0_1_0": 47, "0_1_1_1_1_0_1_0": 47, "1_0_0_0_0_0_0_0": 47, "1_0_0_0_0_0_1_1": 47, "1_0_0_1_0_1_1_1": 47, "1_0_1_1_0_1_1_1": 47, "1_0_1_1_1_1_0_1": 47, "1_1_0_0_0_1_1_1": 47, "1_1_0_0_1_0_1_1": 47, "1_1_0_1_0_1_0_0": 47, "1_1_0_1_1_0_0_1": 47, "1_1_0_1_1_1_1_0": 47, "1_1_1_0_0_1_0_1": 47, "1_1_1_0_1_0_1_0": 47, "1_1_1_1_0_0_1_0": 47, "1_1_1_1_1_0_0_0": 47, "1_1_1_1_1_1_1_1": 47, "54080": 47, "507": 47, "108k": 47, "119": 47, "650": 47, "139": 47, "143": 47, "198": 47, "181": 47, "186": 47, "187": 47, "correctli": 47, "simplest": 47, "nnlm": 49, "complet": 49, "wget": 49, "resourc": 49, "norm": 49, "gzip": 49, "prepare_lm_training_data": 49, "lm_data": 49, "grab": 49, "cup": 49, "coffe": 49, "sort_lm_training_data": 49, "sorted_lm_data": 49, "statist": 49, "lm_data_stat": 49, "aforement": 49, "repeat": 49, "previou": 49, "rnn_lm": 49, "tie": 49, "hidden_dim": 49, "hyper": [49, 57], "coupl": [49, 57], "dai": [49, 57], "former": 51, "mask": [51, 54, 55], "wenet": 51, "did": 51, "request": 51, "complic": 51, "techniqu": 51, "bank": 51, "memor": 51, "histori": 51, "introduc": 51, "variant": 51, "pruned_stateless_emformer_rnnt2": 51, "conv_emformer_transducer_stateless": 51, "ourself": 51, "mechan": 51, "onlin": 53, "lstm_transducer_stateless": 53, "lower": 53, "prepare_giga_speech": 53, "cj2vtpiwqhkn9q1tx6ptpg": 53, "dynam": [54, 55], "causal": 54, "short": [54, 55], "2012": 54, "05481": 54, "flag": 54, "indic": [54, 55], "whether": 54, "sequenc": [54, 55], "uniformli": [54, 55], "seen": [54, 55], "97vkxf80ru61cnp2alwzzg": 54, "streaming_decod": [54, 55], "wise": [54, 55], "parallel": [54, 55], "bath": [54, 55], "parallelli": [54, 55], "seem": 54, "benefit": 54, "320m": 55, "550": 55, "basicli": 55, "scriptmodul": 55, "jit_trace_export": 55, "jit_trace_pretrain": 55, "monoton": 56, "align": 56, "ljspeech": 57, "condit": 57, "variat": 57, "autoencod": 57, "adversari": 57, "monotonic_align": 57, "build_ext": 57, "inplac": 57, "ground": 57, "truth": 57, "test_onnx": 57, "synthesi": 58, "task": 58}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 22, 28, 29], "creat": [2, 13, 21], "recip": [2, 58], "data": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "prepar": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "train": [2, 9, 16, 18, 21, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "decod": [2, 5, 6, 7, 9, 12, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pre": [2, 18, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "model": [2, 5, 15, 18, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "lodr": [4, 6], "rnn": [4, 48, 49], "transduc": [4, 6, 7, 24, 25, 26, 33, 39, 53, 54, 55], "wer": [4, 6, 7, 36], "differ": [4, 6, 7], "beam": [4, 6, 7, 33], "size": [4, 6, 7], "languag": [5, 49], "lm": [6, 36, 48], "rescor": [6, 31, 36], "base": 6, "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 33], "number": 6, "each": 6, "field": 6, "i": 6, "test": [6, 7, 21, 24, 25, 26], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 51], "view": 9, "avail": 9, "tag": 9, "download": [9, 11, 21, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "imag": 9, "run": [9, 22], "gpu": 9, "cpu": 9, "yesno": [9, 46], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 27], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 21, 24, 25, 26, 31, 33, 34, 36], "0": [10, 21], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": 11, "environ": [13, 21], "setup": 13, "virtual": [13, 21], "instal": [13, 21, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "depend": 13, "icefal": [13, 14, 20, 21, 24, 25, 26], "dummi": 14, "tutori": 14, "export": [15, 22, 23, 24, 25, 26, 27, 28, 29, 30, 39, 41, 42, 53, 54, 55, 57], "paramet": 15, "via": [15, 24, 25, 26], "state_dict": [15, 22, 39, 41, 42, 53, 54, 55], "torch": [15, 21, 24, 25, 26, 28, 29, 39, 41, 42, 53, 54, 55], "jit": [15, 24, 25, 26, 28, 29, 39, 41, 42, 53, 54, 55], "script": [15, 28, 39, 41, 42, 54, 55], "onnx": [15, 27], "huggingfac": [17, 19], "space": 19, "youtub": [19, 21], "video": [19, 21], "content": [20, 58], "cuda": 21, "toolkit": 21, "cudnn": 21, "torchaudio": 21, "2": [21, 24, 25, 26, 31, 33, 34, 36], "k2": 21, "3": [21, 24, 25, 26, 31, 33, 36], "lhots": 21, "4": [21, 24, 25, 26], "exampl": [21, 27, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "5": [21, 24, 25, 26], "6": [21, 24, 25, 26], "your": 21, "when": [22, 28, 29], "us": [22, 28, 29, 39, 41, 42, 53, 54, 55], "py": 22, "ncnn": [23, 24, 25, 26], "convemform": 24, "pnnx": [24, 25, 26], "trace": [24, 25, 26, 29, 53, 55], "torchscript": [24, 25, 26], "modifi": [24, 25, 26, 33], "encod": [24, 25, 26], "sherpa": [24, 25, 26, 27, 39, 54, 55], "7": [24, 25], "option": [24, 25, 31, 34, 36, 39, 41, 42, 53, 54, 55], "int8": [24, 25], "quantiz": [24, 25], "lstm": [25, 34, 40, 45, 53], "stream": [26, 35, 50, 51, 54, 55], "zipform": [26, 41, 42, 55], "sound": 27, "conform": [31, 36, 51], "ctc": [31, 34, 36, 40, 41, 44, 45, 47], "configur": [31, 34, 36, 39, 41, 42, 53, 54, 55], "log": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "usag": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "case": [31, 33, 34, 36], "kaldifeat": [31, 33, 34, 36, 40, 44, 45, 47], "hlg": [31, 34, 36], "attent": [31, 36], "colab": [31, 33, 34, 36, 40, 44, 45, 47], "notebook": [31, 33, 34, 36, 40, 44, 45, 47], "deploy": [31, 36], "c": [31, 36], "aishel": 32, "stateless": 33, "loss": 33, "todo": 33, "greedi": 33, "search": [33, 57], "tdnn": [34, 40, 44, 45, 47], "non": 35, "asr": [35, 50], "comput": 36, "n": 36, "gram": 36, "distil": 37, "hubert": 37, "codebook": 37, "index": 37, "librispeech": [38, 52], "prune": [39, 54], "statelessx": [39, 54], "pretrain": [39, 41, 42, 53, 54, 55, 57], "deploi": [39, 54, 55], "infer": [40, 44, 45, 47, 57], "blank": 41, "skip": 41, "mmi": 42, "timit": 43, "ligru": 44, "an": 49, "emform": 51, "which": 53, "simul": [54, 55], "real": [54, 55], "tt": 56, "vit": 57, "build": 57, "monoton": 57, "align": 57, "tabl": 58}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [33, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [21, "training"], [31, "training"], [33, "training"], [34, "training"], [36, "training"], [37, "training"], [39, "training"], [40, "training"], [41, "training"], [42, "training"], [44, "training"], [45, "training"], [47, "training"], [53, "training"], [54, "training"], [55, "training"], [57, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [21, "decoding"], [31, "decoding"], [33, "decoding"], [34, "decoding"], [36, "decoding"], [37, "decoding"], [39, "decoding"], [40, "decoding"], [41, "decoding"], [42, "decoding"], [44, "decoding"], [45, "decoding"], [47, "decoding"], [53, "decoding"], [54, "decoding"], [55, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [51, "introduction"]], "View available tags": [[9, "view-available-tags"]], "Download a docker image": [[9, "download-a-docker-image"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [21, "data-preparation"], [31, "data-preparation"], [34, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"], [39, "data-preparation"], [40, "data-preparation"], [41, "data-preparation"], [42, "data-preparation"], [44, "data-preparation"], [45, "data-preparation"], [47, "data-preparation"], [53, "data-preparation"], [54, "data-preparation"], [55, "data-preparation"], [57, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Huggingface": [[17, "huggingface"]], "Pre-trained models": [[18, "pre-trained-models"]], "Huggingface spaces": [[19, "huggingface-spaces"]], "YouTube Video": [[19, "youtube-video"], [21, "youtube-video"]], "Icefall": [[20, "icefall"]], "Contents:": [[20, null]], "Installation": [[21, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[21, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[21, "install-torch-and-torchaudio"]], "(2) Install k2": [[21, "install-k2"]], "(3) Install lhotse": [[21, "install-lhotse"]], "(4) Download icefall": [[21, "download-icefall"]], "Installation example": [[21, "installation-example"]], "(1) Create a virtual environment": [[21, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[21, "id1"]], "(3) Install torch and torchaudio": [[21, "id2"]], "(4) Install k2": [[21, "id3"]], "(5) Install lhotse": [[21, "id5"]], "(6) Download icefall": [[21, "id6"]], "Test Your Installation": [[21, "test-your-installation"]], "Export model.state_dict()": [[22, "export-model-state-dict"], [39, "export-model-state-dict"], [41, "export-model-state-dict"], [42, "export-model-state-dict"], [53, "export-model-state-dict"], [54, "export-model-state-dict"], [55, "export-model-state-dict"]], "When to use it": [[22, "when-to-use-it"], [28, "when-to-use-it"], [29, "when-to-use-it"]], "How to export": [[22, "how-to-export"], [28, "how-to-export"], [29, "how-to-export"]], "How to use the exported model": [[22, "how-to-use-the-exported-model"], [28, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[22, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[23, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[24, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[24, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[24, "install-ncnn-and-pnnx"], [25, "install-ncnn-and-pnnx"], [26, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[24, "export-the-model-via-torch-jit-trace"], [25, "export-the-model-via-torch-jit-trace"], [26, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[24, "export-torchscript-model-via-pnnx"], [25, "export-torchscript-model-via-pnnx"], [26, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[24, "test-the-exported-models-in-icefall"], [25, "test-the-exported-models-in-icefall"], [26, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[24, "modify-the-exported-encoder-for-sherpa-ncnn"], [25, "modify-the-exported-encoder-for-sherpa-ncnn"], [26, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[24, "optional-int8-quantization-with-sherpa-ncnn"], [25, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[25, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[26, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[27, "export-to-onnx"]], "sherpa-onnx": [[27, "sherpa-onnx"]], "Example": [[27, "example"]], "Download the pre-trained model": [[27, "download-the-pre-trained-model"], [31, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [40, "download-the-pre-trained-model"], [44, "download-the-pre-trained-model"], [45, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"]], "Export the model to ONNX": [[27, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[27, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[28, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[29, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[29, "how-to-use-the-exported-models"]], "Model export": [[30, "model-export"]], "Conformer CTC": [[31, "conformer-ctc"], [36, "conformer-ctc"]], "Configurable options": [[31, "configurable-options"], [34, "configurable-options"], [36, "configurable-options"], [39, "configurable-options"], [41, "configurable-options"], [42, "configurable-options"], [53, "configurable-options"], [54, "configurable-options"], [55, "configurable-options"]], "Pre-configured options": [[31, "pre-configured-options"], [34, "pre-configured-options"], [36, "pre-configured-options"], [39, "pre-configured-options"], [41, "pre-configured-options"], [42, "pre-configured-options"], [53, "pre-configured-options"], [54, "pre-configured-options"], [55, "pre-configured-options"]], "Training logs": [[31, "training-logs"], [33, "training-logs"], [34, "training-logs"], [36, "training-logs"], [39, "training-logs"], [41, "training-logs"], [42, "training-logs"], [53, "training-logs"], [54, "training-logs"], [55, "training-logs"]], "Usage examples": [[31, "usage-examples"], [33, "usage-examples"], [34, "usage-examples"], [36, "usage-examples"]], "Case 1": [[31, "case-1"], [33, "case-1"], [34, "case-1"], [36, "case-1"]], "Case 2": [[31, "case-2"], [33, "case-2"], [34, "case-2"], [36, "case-2"]], "Case 3": [[31, "case-3"], [33, "case-3"], [36, "case-3"]], "Pre-trained Model": [[31, "pre-trained-model"], [33, "pre-trained-model"], [34, "pre-trained-model"], [36, "pre-trained-model"], [40, "pre-trained-model"], [44, "pre-trained-model"], [45, "pre-trained-model"], [47, "pre-trained-model"]], "Install kaldifeat": [[31, "install-kaldifeat"], [33, "install-kaldifeat"], [34, "install-kaldifeat"], [36, "install-kaldifeat"], [40, "install-kaldifeat"], [44, "install-kaldifeat"], [45, "install-kaldifeat"]], "Usage": [[31, "usage"], [33, "usage"], [34, "usage"], [36, "usage"]], "CTC decoding": [[31, "ctc-decoding"], [36, "ctc-decoding"], [36, "id2"]], "HLG decoding": [[31, "hlg-decoding"], [31, "id2"], [34, "hlg-decoding"], [36, "hlg-decoding"], [36, "id3"]], "HLG decoding + attention decoder rescoring": [[31, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[31, "colab-notebook"], [33, "colab-notebook"], [34, "colab-notebook"], [36, "colab-notebook"], [40, "colab-notebook"], [44, "colab-notebook"], [45, "colab-notebook"], [47, "colab-notebook"]], "Deployment with C++": [[31, "deployment-with-c"], [36, "deployment-with-c"]], "aishell": [[32, "aishell"]], "Stateless Transducer": [[33, "stateless-transducer"]], "The Model": [[33, "the-model"]], "The Loss": [[33, "the-loss"]], "Todo": [[33, "id1"]], "Greedy search": [[33, "greedy-search"]], "Beam search": [[33, "beam-search"]], "Modified Beam search": [[33, "modified-beam-search"]], "TDNN-LSTM CTC": [[34, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[35, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[36, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[36, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[37, "distillation-with-hubert"]], "Codebook index preparation": [[37, "codebook-index-preparation"]], "LibriSpeech": [[38, "librispeech"], [52, "librispeech"]], "Pruned transducer statelessX": [[39, "pruned-transducer-statelessx"], [54, "pruned-transducer-statelessx"]], "Usage example": [[39, "usage-example"], [41, "usage-example"], [42, "usage-example"], [53, "usage-example"], [54, "usage-example"], [55, "usage-example"]], "Export Model": [[39, "export-model"], [54, "export-model"], [55, "export-model"]], "Export model using torch.jit.script()": [[39, "export-model-using-torch-jit-script"], [41, "export-model-using-torch-jit-script"], [42, "export-model-using-torch-jit-script"], [54, "export-model-using-torch-jit-script"], [55, "export-model-using-torch-jit-script"]], "Download pretrained models": [[39, "download-pretrained-models"], [41, "download-pretrained-models"], [42, "download-pretrained-models"], [53, "download-pretrained-models"], [54, "download-pretrained-models"], [55, "download-pretrained-models"], [57, "download-pretrained-models"]], "Deploy with Sherpa": [[39, "deploy-with-sherpa"], [54, "deploy-with-sherpa"], [55, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[40, "tdnn-lstm-ctc"], [45, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[40, "inference-with-a-pre-trained-model"], [44, "inference-with-a-pre-trained-model"], [45, "inference-with-a-pre-trained-model"], [47, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[41, "zipformer-ctc-blank-skip"]], "Export models": [[41, "export-models"], [42, "export-models"], [53, "export-models"], [57, "export-models"]], "Zipformer MMI": [[42, "zipformer-mmi"]], "TIMIT": [[43, "timit"]], "TDNN-LiGRU-CTC": [[44, "tdnn-ligru-ctc"]], "YesNo": [[46, "yesno"]], "TDNN-CTC": [[47, "tdnn-ctc"]], "Download kaldifeat": [[47, "download-kaldifeat"]], "RNN-LM": [[48, "rnn-lm"]], "Train an RNN language model": [[49, "train-an-rnn-language-model"]], "Streaming ASR": [[50, "streaming-asr"]], "Streaming Conformer": [[51, "streaming-conformer"]], "Streaming Emformer": [[51, "streaming-emformer"]], "LSTM Transducer": [[53, "lstm-transducer"]], "Which model to use": [[53, "which-model-to-use"]], "Export model using torch.jit.trace()": [[53, "export-model-using-torch-jit-trace"], [55, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[54, "simulate-streaming-decoding"], [55, "simulate-streaming-decoding"]], "Real streaming decoding": [[54, "real-streaming-decoding"], [55, "real-streaming-decoding"]], "Zipformer Transducer": [[55, "zipformer-transducer"]], "TTS": [[56, "tts"]], "VITS": [[57, "vits"]], "Build Monotonic Alignment Search": [[57, "build-monotonic-alignment-search"]], "Inference": [[57, "inference"]], "Recipes": [[58, "recipes"]], "Table of Contents": [[58, null]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/TTS/vctk/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/TTS/vctk/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS", "VITS", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58, 59], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 36, 37, 40, 44, 45, 47, 49, 51, 57, 58], "tool": [0, 10, 21, 24], "make": [0, 1, 3, 24, 25, 26, 31, 33, 36, 51], "consist": [0, 33, 39, 53, 54, 55], "possibl": [0, 2, 3, 31, 36], "black": 0, "format": [0, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "flake8": 0, "check": [0, 21, 36, 49], "qualiti": [0, 32], "isort": 0, "sort": [0, 21, 49], "import": [0, 9, 10, 15, 21, 24, 54, 55], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 26, 31, 32, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "version": [0, 9, 13, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 54], "abov": [0, 4, 6, 7, 10, 13, 15, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54, 55], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "22": [0, 9, 15, 21, 24, 25, 36, 44, 45, 47], "3": [0, 4, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 27, 30, 34, 37, 39, 40, 41, 42, 47, 49, 53, 54, 55, 57, 58], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "5": [0, 7, 15, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57], "4": [0, 4, 5, 6, 7, 9, 10, 11, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "10": [0, 7, 9, 15, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 27, 28, 29, 30, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "after": [0, 1, 6, 9, 11, 12, 13, 16, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "run": [0, 2, 8, 10, 11, 13, 14, 15, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 58], "git": [0, 4, 6, 7, 9, 13, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47, 49], "clone": [0, 4, 6, 7, 13, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47, 49], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "github": [0, 2, 6, 9, 11, 13, 15, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "com": [0, 2, 6, 9, 11, 13, 18, 19, 21, 22, 24, 25, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "k2": [0, 2, 9, 10, 13, 15, 18, 19, 20, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "fsa": [0, 2, 9, 13, 15, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 53, 54, 55], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 18, 19, 22, 23, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58, 59], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "pip": [0, 1, 6, 10, 13, 15, 21, 24, 27, 33], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 19, 20, 22, 23, 27, 30, 37, 39, 41, 42, 47, 53, 54, 55], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 17, 19, 20, 21, 23, 30, 37], "commit": [0, 21], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "automat": [0, 14, 19, 37], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 19, 24, 25, 26, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "ani": [0, 4, 6, 7, 13, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 17, 19, 20, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "wa": [0, 22, 36, 40], "success": [0, 21, 24, 25], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "fix": [0, 9, 10, 13, 24, 25, 26, 36], "issu": [0, 4, 6, 7, 10, 21, 24, 25, 36, 37, 54, 55], "report": [0, 9, 10, 37], "some": [0, 1, 4, 6, 22, 24, 25, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 19, 21, 22, 23, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "e": [0, 2, 4, 5, 6, 7, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "modifi": [0, 23, 30, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "file": [0, 2, 9, 14, 15, 19, 20, 22, 24, 25, 26, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "place": [0, 21, 22, 33, 36, 40], "so": [0, 4, 6, 7, 9, 13, 19, 20, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 15, 19, 21, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 54, 55], "ha": [0, 2, 20, 21, 23, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 51, 53, 54, 55], "been": [0, 21, 23, 24, 25, 26, 33], "befor": [0, 1, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "all": [0, 9, 11, 13, 14, 18, 19, 22, 24, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "again": [0, 24, 25, 47], "should": [0, 2, 4, 6, 11, 13, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58, 59], "time": [0, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "succeed": 0, "want": [0, 4, 6, 7, 11, 13, 15, 21, 22, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "do": [0, 2, 4, 6, 13, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 19, 31, 36], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 21, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 18, 19, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "prepar": [1, 3, 4, 8, 14, 16, 20, 22, 56], "environ": [1, 10, 11, 12, 14, 16, 20, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55], "doc": [1, 22, 51], "r": [1, 13, 21, 24, 25, 26, 44, 45], "requir": [1, 4, 6, 11, 13, 15, 21, 26, 37, 49, 54, 55, 57, 58], "txt": [1, 4, 9, 11, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 49, 57, 58], "set": [1, 4, 6, 7, 10, 12, 13, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 49, 53, 54, 55], "up": [1, 21, 22, 24, 25, 26, 31, 34, 36, 37, 39, 40, 41, 42, 54, 55], "readi": [1, 31, 36, 37, 49], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 21, 22, 23, 24, 25, 26, 28, 29, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 49, 51, 54, 55], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 56], "local": [1, 9, 15, 21, 39, 41, 42, 49, 53, 54, 55], "preview": 1, "what": [1, 2, 11, 15, 21, 24, 25, 26, 33, 51], "look": [1, 2, 4, 6, 7, 14, 18, 21, 24, 25, 26, 31, 33, 34, 36, 37], "like": [1, 2, 9, 11, 19, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54], "publish": [1, 22, 32], "html": [1, 2, 10, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 53, 54, 55], "gener": [1, 6, 9, 14, 15, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55, 57, 58], "view": [1, 8, 20, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "python3": [1, 9, 10, 13, 15, 21, 25, 26], "m": [1, 15, 21, 24, 25, 26, 33, 39, 41, 42, 44, 45, 53, 54, 55], "server": [1, 19, 53], "It": [1, 2, 6, 7, 9, 11, 14, 15, 17, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "print": [1, 12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "serv": [1, 39, 41, 42, 53, 54, 55], "port": [1, 14, 37, 39, 41, 42, 53, 54, 55], "8000": [1, 11, 15, 47], "open": [1, 4, 6, 7, 9, 20, 22, 24, 25, 26, 32, 33, 36, 37], "browser": [1, 17, 19, 39, 41, 42, 53, 54, 55], "go": [1, 7, 31, 33, 36, 39, 41, 42, 53, 54, 55], "read": [2, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "code": [2, 3, 8, 10, 13, 15, 20, 21, 24, 25, 26, 31, 36, 37, 39, 40, 44, 45, 47, 51, 54, 55], "style": [2, 3, 20], "adjust": [2, 49, 57, 58], "design": 2, "python": [2, 9, 13, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 36, 39, 41, 42, 49, 53, 54, 55, 57, 58], "recommend": [2, 6, 7, 21, 31, 33, 34, 36, 37, 39, 54, 55], "test": [2, 4, 9, 15, 20, 22, 23, 30, 31, 33, 34, 36, 37, 40, 41, 44, 45, 49, 57, 58], "valid": [2, 21, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "dataset": [2, 10, 11, 13, 14, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "lhots": [2, 9, 11, 13, 15, 20, 22, 24, 25, 26, 31, 33, 36], "readthedoc": [2, 11, 21], "io": [2, 11, 13, 15, 21, 23, 24, 25, 26, 27, 28, 29, 39, 53, 54, 55], "en": [2, 11, 21, 24], "latest": [2, 9, 11, 13, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "index": [2, 21, 23, 24, 25, 26, 27, 28, 29, 53, 54, 55], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 20, 21, 35, 47, 59], "veri": [2, 3, 7, 13, 24, 25, 26, 33, 44, 45, 47, 54, 55], "good": [2, 7], "exampl": [2, 11, 13, 19, 20, 22, 24, 25, 26, 28, 29, 30, 37, 40, 44, 45, 47], "speech": [2, 11, 13, 14, 19, 20, 21, 23, 32, 33, 47, 57, 58, 59], "pull": [2, 4, 6, 7, 9, 24, 25, 26, 27, 31, 33, 36, 49, 51], "380": [2, 24, 45], "show": [2, 4, 6, 7, 9, 15, 19, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "add": [2, 11, 24, 25, 26, 31, 33, 34, 54, 59], "new": [2, 3, 9, 13, 19, 21, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 47, 53, 54, 55], "suppos": [2, 9, 54, 55], "would": [2, 11, 22, 24, 25, 26, 36, 40, 54, 55], "name": [2, 9, 10, 13, 15, 22, 24, 25, 26, 27, 31, 33, 39, 41, 42, 49, 54, 55], "foo": [2, 29, 31, 36, 39, 41, 42, 53, 54, 55], "eg": [2, 9, 10, 11, 12, 15, 16, 18, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "mkdir": [2, 24, 25, 31, 33, 34, 36, 40, 44, 45, 47], "p": [2, 4, 13, 21, 24, 25, 33, 44, 45], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 59], "touch": 2, "sh": [2, 9, 11, 21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "chmod": 2, "x": [2, 4, 26, 51], "simpl": [2, 12, 14, 16, 21, 33, 49], "own": [2, 11, 37, 39, 49, 54, 55], "otherwis": [2, 24, 25, 26, 31, 33, 36, 37, 39, 41, 42, 53, 54, 55], "librispeech": [2, 4, 6, 7, 10, 18, 20, 22, 24, 25, 26, 27, 28, 29, 35, 36, 37, 39, 40, 41, 42, 49, 50, 51, 53, 54, 55, 59], "assum": [2, 4, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 49, 53, 54, 55], "fanci": 2, "call": [2, 10, 27, 37, 49], "bar": [2, 29, 31, 36, 39, 41, 42, 53, 54, 55], "organ": 2, "wai": [2, 3, 15, 30, 39, 41, 42, 51, 53, 54, 55], "readm": [2, 31, 33, 34, 36, 40, 44, 45, 47], "md": [2, 18, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "asr_datamodul": [2, 9, 10, 15, 21], "pretrain": [2, 4, 6, 7, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 40, 44, 45, 47, 56], "For": [2, 4, 6, 7, 10, 14, 18, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "instanc": [2, 10, 12, 16, 18, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "tdnn": [2, 9, 10, 12, 15, 16, 21, 32, 35, 38, 43, 46], "its": [2, 4, 22, 23, 24, 25, 26, 29, 33, 41, 49], "directori": [2, 9, 11, 13, 20, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "structur": [2, 26], "descript": [2, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "contain": [2, 8, 11, 13, 14, 15, 20, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 59], "inform": [2, 4, 6, 11, 12, 16, 21, 22, 31, 33, 34, 36, 39, 40, 41, 44, 45, 47, 51, 53, 54, 55], "g": [2, 4, 5, 6, 7, 11, 13, 21, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "wer": [2, 5, 9, 12, 15, 21, 22, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "etc": [2, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "provid": [2, 11, 15, 19, 21, 22, 23, 24, 25, 26, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 59], "pytorch": [2, 10, 13, 21, 24, 25, 26, 33], "dataload": [2, 21], "take": [2, 7, 9, 22, 37, 39, 47, 49, 54, 55, 57, 58], "input": [2, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47, 51], "checkpoint": [2, 4, 6, 7, 12, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "save": [2, 15, 16, 21, 22, 25, 26, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "dure": [2, 4, 5, 7, 10, 13, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "stage": [2, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "": [2, 4, 6, 7, 9, 14, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "definit": [2, 24, 25], "neural": [2, 4, 6, 7, 31, 36, 49], "network": [2, 31, 33, 36, 39, 41, 42, 49, 53, 54, 55], "script": [2, 6, 7, 13, 14, 20, 21, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 49, 53], "infer": [2, 22, 24, 25, 56], "tdnn_lstm_ctc": [2, 34, 40, 45], "conformer_ctc": [2, 31, 36], "get": [2, 9, 13, 14, 15, 19, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 47, 51, 53, 54, 55], "feel": [2, 37, 49, 53], "result": [2, 4, 7, 9, 16, 18, 19, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "everi": [2, 22, 39, 41, 42, 53, 54, 55], "kept": [2, 39, 54, 55], "self": [2, 23, 26, 51], "toler": 2, "duplic": 2, "among": [2, 21], "differ": [2, 12, 21, 24, 25, 26, 27, 31, 32, 36, 37, 39, 51, 53, 54, 55], "invoc": [2, 24, 25], "help": [2, 12, 14, 16, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "blob": [2, 11, 18, 21, 22, 29, 39, 41, 42, 53, 54, 55], "master": [2, 6, 9, 11, 15, 18, 21, 22, 25, 26, 28, 29, 33, 37, 39, 41, 42, 53, 54, 55], "transform": [2, 6, 7, 31, 36, 53], "conform": [2, 28, 32, 33, 35, 38, 39, 41, 53, 54, 55], "base": [2, 4, 7, 13, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55], "lstm": [2, 23, 29, 30, 32, 35, 38, 43, 50, 52], "attent": [2, 26, 33, 34, 37, 51, 54, 55], "lm": [2, 4, 5, 7, 9, 11, 20, 21, 33, 39, 40, 44, 45, 47, 49, 54, 55, 59], "rescor": [2, 5, 20, 34, 40, 42, 44, 45, 47, 49], "demonstr": [2, 14, 15, 17, 19, 22, 27], "consid": [2, 4, 26], "colab": [2, 21], "notebook": [2, 21], "welcom": 3, "There": [3, 4, 15, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "mani": [3, 12, 21, 54, 55], "two": [3, 4, 11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "them": [3, 5, 6, 17, 18, 19, 24, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "To": [3, 4, 5, 6, 7, 11, 15, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "document": [3, 20, 22, 23, 24, 25, 26, 27, 42], "repositori": [3, 9, 24, 25, 26, 27], "recip": [3, 4, 6, 7, 9, 11, 15, 18, 20, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "In": [3, 4, 6, 10, 15, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 40, 44, 45, 47, 51], "page": [3, 19, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 59], "describ": [3, 5, 8, 9, 17, 22, 24, 25, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 44, 45, 54, 55], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 17, 19, 20, 21, 24, 25, 26, 27, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "creat": [3, 4, 6, 7, 14, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 20, 22, 24, 25, 26, 27, 28, 29, 32, 49, 56], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 17, 19, 20, 22, 23, 28, 29, 30, 48, 51, 56, 59], "decod": [3, 4, 8, 10, 11, 14, 15, 19, 20, 24, 25, 26, 29, 30, 49], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 19, 20, 21, 23, 37, 48, 51, 56, 59], "As": [4, 5, 6, 7, 24, 33, 36, 37, 49], "type": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 36, 39, 41, 42, 47, 51, 53, 54, 55], "e2": [4, 7, 21, 49], "usual": [4, 6, 7, 12, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55, 57, 58], "an": [4, 5, 6, 7, 9, 11, 13, 15, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 31, 32, 33, 36, 37, 39, 42, 47, 48, 53, 54, 55, 57, 58, 59], "intern": [4, 5], "languag": [4, 7, 11, 19, 20, 31, 33, 34, 48, 59], "learn": [4, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "level": [4, 5, 15, 49], "corpu": [4, 6, 7, 32, 49], "real": 4, "life": 4, "scenario": 4, "often": [4, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "mismatch": [4, 54], "between": [4, 7, 39, 54, 55], "target": [4, 19, 21], "space": [4, 17, 20, 49], "problem": [4, 6, 7, 21, 37], "when": [4, 6, 9, 10, 15, 19, 24, 25, 26, 30, 33, 36, 37, 39, 41, 42, 49, 54, 55], "act": 4, "against": [4, 21], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 20, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55, 57, 58], "low": [4, 24, 25], "order": [4, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 26], "improv": [4, 5, 6, 7, 33, 49], "perform": [4, 6, 7, 23, 33, 37, 54], "languga": 4, "integr": [4, 19], "pruned_transducer_stateless7_stream": [4, 6, 7, 26, 27, 55], "stream": [4, 6, 7, 15, 20, 23, 24, 25, 27, 30, 31, 36, 44, 45, 53, 59], "howev": [4, 6, 7, 22, 25, 37], "easili": [4, 6, 7, 31, 34, 36], "appli": [4, 6, 7, 33, 51], "other": [4, 7, 13, 14, 15, 22, 25, 26, 27, 33, 36, 37, 39, 40, 44, 45, 47, 51, 54, 55, 59], "encount": [4, 6, 7, 10, 21, 26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "here": [4, 6, 7, 22, 24, 25, 26, 31, 33, 34, 36, 37, 40, 51, 54], "simplic": [4, 6, 7], "same": [4, 6, 7, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 18, 28, 53], "first": [4, 6, 9, 10, 11, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "let": [4, 6, 7, 14, 21, 24, 25, 26, 31, 36, 49], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 33, 51, 55], "address": [4, 9, 15, 19, 21, 22, 24, 25, 26, 33, 39, 42, 53, 54, 55], "sourc": [4, 11, 13, 21, 22, 24, 25, 26, 31, 32, 33, 36], "acoust": [4, 54, 55], "similar": [4, 5, 37, 41, 54, 55], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "score": [4, 5, 7, 31, 36, 39, 54, 55], "left": [4, 24, 26, 33, 54, 55], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 24, 33, 51, 54], "log": [4, 9, 10, 12, 15, 16, 21, 24, 25, 26, 40, 44, 45, 47, 57, 58], "y_": 4, "u": [4, 21, 24, 25, 26, 31, 33, 34, 36, 37, 47], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 10, 54], "weight": [4, 15, 31, 34, 36, 41, 42, 49, 53], "respect": 4, "onli": [4, 6, 8, 9, 11, 13, 14, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "compar": [4, 24, 25, 26, 54], "shallow": [4, 5, 20, 49], "fusion": [4, 5, 20, 49], "subtract": [4, 5], "work": [4, 9, 13, 15, 24, 25, 26, 36], "treat": [4, 25, 26], "predictor": 4, "joiner": [4, 24, 25, 26, 27, 29, 33, 39, 53, 54, 55], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 21, 31, 37, 39, 41, 42, 44, 45, 53, 54, 55], "gram": [4, 6, 21, 31, 33, 34, 39, 40, 42, 44, 45, 54, 55], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "formula": 4, "rnnt": [4, 39, 54, 55], "bi": [4, 6], "addit": 4, "estim": 4, "li": 4, "choic": 4, "accord": [4, 49], "origin": [4, 5], "paper": [4, 5, 37, 39, 53, 54, 55, 57, 58], "achiev": [4, 6, 7, 49, 51], "both": [4, 39, 41, 42, 51, 53, 54, 55], "intra": 4, "cross": 4, "much": [4, 24, 25], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 13, 15, 21, 24, 25, 26, 31, 36, 37, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55], "illustr": [4, 6, 7, 49], "purpos": [4, 6, 7, 24, 25, 49], "from": [4, 6, 7, 9, 10, 11, 14, 15, 17, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "link": [4, 6, 7, 18, 21, 22, 23, 39, 41, 42, 53, 54, 55, 57, 58], "scratch": [4, 6, 7, 39, 41, 42, 49, 53, 54, 55, 57, 58], "prune": [4, 6, 7, 22, 26, 27, 33, 35, 37, 38, 50, 51, 52, 53, 55], "statelessx": [4, 6, 7, 35, 37, 38, 50, 51, 52], "initi": [4, 6, 7, 9, 31, 34], "step": [4, 6, 7, 11, 14, 21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 49, 53, 54, 55], "download": [4, 6, 7, 8, 10, 13, 15, 19, 20, 23, 30, 32, 37, 49, 56], "git_lfs_skip_smudg": [4, 6, 7, 24, 25, 26, 27, 49], "huggingfac": [4, 6, 7, 18, 20, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 41, 42, 44, 45, 47, 49, 53, 57, 58], "co": [4, 6, 7, 18, 19, 21, 22, 24, 25, 26, 27, 31, 32, 33, 34, 36, 40, 41, 42, 44, 45, 47, 49, 53, 57, 58], "zengwei": [4, 6, 7, 24, 26, 27, 42, 49, 53, 57], "stateless7": [4, 6, 7, 26, 27], "2022": [4, 6, 7, 22, 24, 25, 26, 27, 33, 39, 41, 42, 53, 54], "12": [4, 6, 7, 9, 14, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 44, 47, 53, 54, 55, 58], "29": [4, 6, 7, 21, 26, 27, 31, 33, 34, 36, 40, 41, 44, 45, 57], "exp": [4, 6, 7, 9, 15, 16, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "lf": [4, 6, 7, 22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 42, 44, 45, 47, 49], "includ": [4, 6, 7, 24, 25, 26, 27, 39, 41, 42, 49, 53, 54, 55], "pt": [4, 6, 7, 9, 11, 15, 21, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "ln": [4, 6, 7, 9, 15, 22, 24, 25, 26, 27, 31, 36, 39, 41, 42, 49, 53, 54, 55], "epoch": [4, 6, 7, 9, 12, 15, 16, 21, 22, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "99": [4, 6, 7, 15, 21, 24, 25, 26, 27], "symbol": [4, 5, 6, 7, 21, 33, 39, 54, 55], "load": [4, 6, 7, 9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "lang_bpe_500": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 36, 39, 41, 42, 49, 53, 54, 55], "bpe": [4, 5, 6, 7, 22, 24, 25, 26, 27, 29, 36, 39, 41, 42, 49, 53, 54, 55], "done": [4, 6, 7, 9, 13, 15, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "via": [4, 6, 7, 14, 21, 23, 28, 29, 30, 49], "exp_dir": [4, 6, 7, 9, 15, 21, 24, 25, 26, 33, 36, 37, 39, 41, 42, 54, 55], "avg": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 28, 29, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "averag": [4, 6, 7, 9, 12, 15, 21, 22, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "fals": [4, 6, 7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37], "dir": [4, 6, 7, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "max": [4, 6, 7, 21, 22, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55, 57, 58], "durat": [4, 6, 7, 11, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "600": [4, 6, 7, 21, 22, 36, 39, 41, 53, 54, 55], "chunk": [4, 6, 7, 24, 26, 27, 54, 55], "len": [4, 6, 7, 26, 27, 55], "32": [4, 6, 7, 21, 24, 25, 26, 27, 31, 33, 34, 55], "method": [4, 5, 7, 15, 19, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 53, 54, 55], "modified_beam_search": [4, 5, 6, 7, 19, 33, 37, 39, 41, 53, 54, 55], "clean": [4, 9, 15, 21, 26, 31, 33, 36, 37, 39, 40, 41, 42, 53, 54, 55], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 21, 24, 25, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57], "best": [4, 5, 6, 7, 24, 25, 26, 31, 34, 36], "7": [4, 6, 7, 9, 21, 22, 23, 26, 30, 31, 34, 36, 39, 40, 44, 45, 53, 54], "93": [4, 6, 7, 15], "Then": [4, 6], "necessari": [4, 37, 49], "note": [4, 5, 6, 7, 10, 11, 15, 22, 24, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "960": [4, 36, 39, 41, 42, 53, 54, 55], "hour": [4, 13, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 27], "popd": [4, 6, 7, 27], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 21, 33, 47], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 21, 36], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 21, 25, 31, 36, 47], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 21, 24, 25, 34, 40, 44, 45, 47], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 24, 25, 31, 36, 37, 40, 42, 44, 45], "embed": [4, 6, 7, 33, 39, 49, 53, 54, 55], "dim": [4, 6, 7, 24, 25, 26, 33, 39, 49, 54], "2048": [4, 6, 7, 22, 24, 25, 26, 33, 49], "hidden": [4, 6, 7, 25, 49, 53], "num": [4, 6, 7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55, 57, 58], "layer": [4, 6, 7, 24, 25, 26, 33, 37, 39, 49, 51, 53, 54, 55], "vocab": [4, 6, 7, 36], "500": [4, 6, 7, 22, 24, 25, 26, 33, 36, 42, 53, 57, 58], "token": [4, 11, 22, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 49, 57, 58], "ngram": [4, 36, 40, 44, 45], "2": [4, 6, 7, 9, 11, 13, 15, 20, 22, 23, 30, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "extra": [4, 24, 25, 26, 33, 51, 54], "argument": [4, 7, 15, 37, 51], "need": [4, 6, 11, 13, 14, 15, 19, 21, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55], "given": [4, 9, 11, 12, 13, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 54, 55, 57, 58], "specifi": [4, 7, 10, 12, 15, 16, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "neg": [4, 33], "number": [4, 7, 16, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "obtain": [4, 7, 31, 33, 34, 36, 40, 44, 45], "shown": [4, 7], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54], "61": [4, 6], "6": [4, 6, 7, 9, 10, 11, 15, 23, 30, 31, 33, 36, 39, 40, 44, 45, 53, 58], "74": [4, 6, 21, 22], "recal": 4, "lowest": [4, 12, 15, 39, 41, 42, 53, 54, 55], "77": [4, 6, 7, 21, 36], "08": [4, 6, 7, 9, 15, 26, 36, 40, 42, 44, 45, 47, 53], "inde": 4, "even": [4, 19, 21, 25], "better": [4, 6], "increas": [4, 6, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "8": [4, 6, 7, 9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 47, 53, 54, 55], "45": [4, 6, 15, 21, 24, 26, 31, 33, 36], "38": [4, 6, 21, 24, 31, 33, 36, 44], "23": [4, 6, 9, 10, 11, 15, 21, 24, 25, 26, 31, 33, 34, 36, 44, 45, 47], "section": [5, 8, 9, 10, 17, 21, 22, 27, 28, 29, 30, 31, 36], "langugag": 5, "transduc": [5, 20, 22, 23, 27, 30, 32, 35, 37, 38, 49, 50, 51, 52], "rnn": [5, 6, 7, 20, 25, 33, 39, 41, 53, 54, 55, 59], "avail": [5, 6, 8, 15, 20, 21, 22, 24, 25, 26, 31, 33, 36, 40, 44, 45, 47, 53], "beam": [5, 22, 53], "search": [5, 6, 7, 18, 19, 56], "realli": [5, 31, 34, 36, 39, 41, 42, 53, 54, 55], "valu": [5, 7, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "t": [5, 13, 14, 15, 21, 24, 25, 26, 27, 28, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "doe": [5, 15, 24, 25, 26, 31, 33, 36, 47], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 17, 18, 21, 22, 23, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 47, 49, 51, 53, 54, 55], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 49], "re": [5, 6, 10, 31, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 20, 49], "commonli": [6, 7, 31, 33, 34, 36, 40, 44, 45, 47], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 14, 21, 24, 25, 26, 31, 36, 37, 47, 49, 51, 53, 54, 57, 58], "effici": [6, 7, 39, 54, 55], "than": [6, 21, 22, 25, 31, 33, 34, 36, 39, 40, 41, 42, 47, 53, 54, 55], "sinc": [6, 13, 21, 24, 25, 26, 37, 47, 53], "less": [6, 22, 36, 40, 47, 54, 55], "comput": [6, 15, 21, 22, 24, 25, 26, 31, 33, 34, 37, 39, 40, 42, 44, 45, 47, 53, 54, 55], "gpu": [6, 7, 8, 13, 14, 20, 21, 24, 25, 31, 33, 34, 36, 37, 39, 41, 42, 44, 45, 47, 53, 54, 55], "try": [6, 10, 12, 15, 17, 19, 37, 39, 41, 42, 53, 54, 55], "might": [6, 7, 25, 26, 54, 55], "ideal": [6, 7], "mai": [6, 7, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55, 59], "With": [6, 21], "43": [6, 9, 25, 26, 36], "great": 6, "made": [6, 24], "boost": [6, 7], "tabl": [6, 19, 24, 25, 26], "67": [6, 21], "59": [6, 15, 21, 24, 34, 36], "86": 6, "fact": 6, "arpa": [6, 11, 47], "performn": 6, "depend": [6, 14, 15, 21, 31, 36], "kenlm": 6, "kpu": 6, "archiv": [6, 49], "zip": 6, "execut": [6, 7, 13, 24, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "9": [6, 9, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 47, 53, 54, 55], "57": [6, 21, 25, 36, 40], "slightli": 6, "63": [6, 33], "04": [6, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "52": [6, 21, 31, 36], "73": 6, "mention": [6, 51], "earlier": 6, "benchmark": [6, 33], "speed": [6, 24, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "132": 6, "95": [6, 32], "177": [6, 21, 22, 25, 26, 33, 34, 36], "96": [6, 21], "210": [6, 44, 45], "262": [6, 7, 15], "62": [6, 7, 21, 36, 40], "65": [6, 7, 21, 24], "352": [6, 7, 36], "58": [6, 7, 10, 21, 36], "488": [6, 7, 24, 25, 26], "400": [6, 9, 32], "610": 6, "870": 6, "156": [6, 15], "203": [6, 15, 22, 36], "255": [6, 25, 26], "160": [6, 15], "263": [6, 9, 15, 21, 25], "singl": [6, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "32g": 6, "v100": [6, 31, 33, 34, 36], "vari": 6, "word": [7, 11, 12, 15, 31, 33, 34, 36, 40, 44, 45, 47, 49], "error": [7, 9, 10, 12, 13, 15, 21, 24, 25, 26, 36], "rate": [7, 12, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "These": [7, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "alreadi": [7, 11, 13, 21, 22], "But": [7, 24, 39, 41, 42, 53, 54, 55], "long": [7, 24, 49, 57, 58], "true": [7, 9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "either": [7, 15, 19, 31, 33, 34, 36, 54, 55], "choos": [7, 19, 21, 37, 39, 41, 42, 53, 54, 55], "three": [7, 15, 24, 25, 26, 29, 31, 33, 51], "associ": 7, "dimens": [7, 39, 49, 54, 55], "obviou": 7, "rel": 7, "reduct": [7, 15, 21, 24, 25, 41], "around": 7, "A": [7, 14, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 53, 54, 55], "few": [7, 11, 24, 25, 26, 37], "paramet": [7, 14, 22, 24, 25, 26, 28, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 49, 53, 54, 55, 57, 58], "tune": [7, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "control": [7, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "too": 7, "small": [7, 33, 44, 45, 47], "fulli": 7, "util": [7, 9, 10, 15, 21, 36], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 31, 33, 34, 36], "activ": [7, 13, 19, 21], "path": [7, 9, 15, 19, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "trade": 7, "off": [7, 24], "accuraci": [7, 24, 25, 32], "larger": [7, 25, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "slower": 7, "built": [8, 9, 21], "imag": [8, 20], "cpu": [8, 12, 13, 14, 15, 16, 20, 21, 22, 24, 25, 26, 28, 31, 39, 41, 42, 47, 54, 55], "still": [8, 24, 25, 26], "introduct": [8, 20, 50, 59], "tag": [8, 20], "within": [8, 14, 17, 19, 20, 24, 25], "updat": [8, 24, 25, 26], "host": [9, 22], "hub": 9, "k2fsa": 9, "find": [9, 10, 16, 17, 18, 19, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "dockerfil": 9, "tree": [9, 11, 28, 29, 31, 33, 34, 36, 40, 44, 45, 47, 53], "item": [9, 14], "curl": 9, "registri": 9, "v2": [9, 26, 31, 36], "jq": 9, "give": [9, 11, 15, 33], "someth": [9, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 21], "torch1": [9, 10, 21], "cuda10": 9, "13": [9, 10, 15, 21, 22, 24, 25, 26, 33, 34, 36, 40, 41, 44], "releas": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "torch": [9, 10, 13, 14, 20, 22, 23, 30, 31, 33, 36], "select": [9, 12, 13, 14, 19, 21, 24, 25, 26, 39, 40, 44, 45, 47, 53, 54, 55], "appropri": [9, 21], "combin": [9, 12, 24, 25, 26], "cuda": [9, 10, 15, 20, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "sudo": [9, 31, 34], "rm": 9, "bin": [9, 13, 21, 24, 25, 26, 31, 36], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 19, 21, 22, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "interfac": 9, "present": [9, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "root": [9, 24, 25, 26, 49], "60c947eac59c": 9, "workspac": 9, "current": [9, 19, 24, 25, 33, 37, 51, 53, 54, 55, 57, 58, 59], "user": [9, 10], "copi": [9, 21, 51], "switch": [9, 21, 31, 36, 42], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 21, 26], "site": [9, 10, 15, 21, 26], "packag": [9, 10, 15, 21, 26, 57, 58], "__init__": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 36], "line": [9, 10, 11, 24, 25, 26, 39, 49, 54, 55], "modul": [9, 13, 20, 24, 26, 41, 54], "_k2": [9, 10, 21], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 20], "libcuda": 9, "cannot": [9, 20, 24, 25, 26], "share": [9, 20, 21], "object": [9, 20, 21, 31, 33, 34, 39, 47, 53, 54], "No": [9, 13, 20, 24, 25, 26, 47], "stub": 9, "list": [9, 15, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "16": [9, 15, 21, 22, 24, 25, 26, 29, 31, 33, 34, 36, 39, 40, 44, 45, 47, 53, 54, 55], "second": [9, 14, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "2023": [9, 15, 21, 24, 25, 26, 41, 49, 57, 58], "01": [9, 11, 15, 21, 24, 33, 34, 36, 37, 41], "02": [9, 11, 21, 22, 24, 25, 26, 33, 36, 39, 45, 53, 54], "06": [9, 15, 21, 22, 24, 34, 36, 40, 47], "info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "264": [9, 21, 26], "posixpath": [9, 15, 21, 24, 25, 26, 33, 36], "lang_dir": [9, 15, 21, 33, 36], "lang_phon": [9, 11, 15, 21, 34, 40, 44, 45, 47], "feature_dim": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 47], "search_beam": [9, 15, 21, 31, 36, 47], "20": [9, 14, 15, 21, 22, 24, 26, 31, 33, 34, 36, 39, 40, 44, 45, 47, 49, 54], "output_beam": [9, 15, 21, 31, 36, 47], "min_active_st": [9, 15, 21, 31, 36, 47], "30": [9, 10, 15, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "max_active_st": [9, 15, 21, 31, 36, 47], "10000": [9, 15, 21, 31, 36, 47], "use_double_scor": [9, 15, 21, 31, 36, 47], "14": [9, 10, 15, 21, 22, 24, 25, 28, 31, 36, 39, 40, 41, 44, 53, 54, 55], "export": [9, 10, 11, 12, 13, 14, 16, 20, 21, 31, 33, 34, 36, 37, 40, 44, 45, 47, 56], "feature_dir": [9, 15, 21, 36], "fbank": [9, 11, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "max_dur": [9, 15, 21, 36], "bucketing_sampl": [9, 15, 21, 36], "num_bucket": [9, 15, 21, 36], "concatenate_cut": [9, 15, 21, 36], "duration_factor": [9, 15, 21, 36], "gap": [9, 15, 21, 36], "on_the_fly_feat": [9, 15, 21, 36], "shuffl": [9, 15, 21, 36], "return_cut": [9, 15, 21, 36], "num_work": [9, 15, 21, 36], "env_info": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "sha1": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 21], "date": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36], "tue": [9, 21, 24, 36], "jul": [9, 15, 21], "25": [9, 15, 21, 22, 24, 25, 31, 36, 39, 44, 45, 47, 54], "36": [9, 21, 24, 33, 36, 37], "dev": [9, 10, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "7640d663": 9, "branch": [9, 15, 21, 22, 24, 25, 26, 31, 33, 36, 41], "375520d": 9, "fri": [9, 22], "28": [9, 21, 24, 25, 33, 36, 40], "07": [9, 21, 24, 25, 26, 31, 33, 34, 36], "hostnam": [9, 15, 21, 22, 24, 25, 26, 33], "ip": [9, 15, 21, 22, 24, 25, 26, 33], "172": 9, "17": [9, 21, 22, 24, 25, 26, 31, 36, 44, 45, 53], "401": 9, "lexicon": [9, 11, 15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "168": [9, 15, 21, 40], "compil": [9, 15, 21, 24, 25, 31, 33, 36], "linv": [9, 11, 15, 21, 33, 36, 47], "403": [9, 40], "273": [9, 15, 21, 22, 33], "devic": [9, 15, 21, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 54, 55], "406": [9, 36], "291": [9, 21], "424": 9, "218": [9, 15, 21, 25], "about": [9, 11, 12, 14, 15, 16, 21, 24, 25, 26, 33, 37, 39, 42, 53, 54, 55], "cut": [9, 15, 21, 36], "425": [9, 25, 36], "252": [9, 21], "504": 9, "204": [9, 21, 26, 36], "batch": [9, 15, 21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "process": [9, 15, 21, 22, 24, 25, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "until": [9, 15, 21, 36, 41], "w": [9, 21, 36, 44, 45], "nnpack": 9, "cpp": [9, 24, 28], "53": [9, 15, 21, 26, 31, 39, 40, 45, 53, 54], "could": [9, 24, 25, 26, 31, 34, 49], "reason": [9, 14, 22, 24, 25, 26, 54], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 21, 31], "transcript": [9, 15, 21, 31, 32, 33, 34, 36, 39, 40, 44, 45, 53, 54, 55], "store": [9, 11, 15, 21, 36, 49], "recog": [9, 15, 21, 33, 36], "test_set": [9, 15, 21, 47], "688": 9, "564": [9, 15, 21], "240": [9, 15, 21, 31, 47], "ins": [9, 15, 21, 36, 47], "del": [9, 15, 21, 36, 47], "sub": [9, 15, 21, 36, 47], "690": 9, "249": [9, 21, 25], "wrote": [9, 15, 21, 36], "detail": [9, 11, 15, 21, 23, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 51, 53, 54, 55, 57, 58], "stat": [9, 15, 21, 36], "err": [9, 15, 21, 33, 36], "316": [9, 21, 36], "congratul": [9, 13, 21, 24, 25, 26, 31, 34, 36, 40, 44, 45, 47], "finish": [9, 14, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55], "successfulli": [9, 13, 21, 24, 25, 26], "collect": [10, 13, 21, 49], "post": 10, "correspond": [10, 18, 19], "solut": 10, "One": 10, "torchaudio": [10, 13, 20, 51], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 21, 44, 45], "org": [10, 13, 21, 32, 33, 39, 49, 53, 54, 55], "whl": [10, 13, 21], "torch_stabl": [10, 13, 21], "throw": [10, 24, 25, 26], "while": [10, 16, 21, 24, 25, 26, 31, 33, 34, 36, 37, 39, 41, 42, 49, 53, 54, 55], "That": [10, 11, 14, 15, 16, 24, 25, 37, 39, 53, 54, 55], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 54, 55], "recent": [10, 24, 25, 26], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 24, 25, 31, 36], "xxx": [10, 22, 24, 25, 26], "next": [10, 13, 14, 19, 21, 24, 25, 26, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55], "gen": [10, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "kaldi": [10, 11, 13, 14, 19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "34": [10, 24, 25], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 21, 24, 26, 33, 36, 40, 44], "tensorboard": [10, 16, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 21], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "py3": [10, 21], "linux": [10, 13, 14, 19, 21, 23, 24, 25, 26, 27], "x86_64": [10, 21, 24], "egg": 10, "handl": [10, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "except": [10, 22], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 41], "104": [10, 15, 21], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 19, 23, 24, 25, 26, 27], "probabl": [10, 33, 39, 41, 53, 54, 55], "variabl": [10, 12, 13, 16, 21, 24, 25, 26, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 24], "libpython": 10, "abl": 10, "insid": [10, 29], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 21, 24, 31, 33, 34, 36, 37, 39, 40, 44, 45, 47, 54, 55, 57, 58], "everyth": [11, 23], "tmp": [11, 12, 13, 15, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pythonpath": [11, 12, 13, 15, 16, 21, 24, 25, 26], "each": [11, 15, 22, 24, 25, 27, 31, 33, 34, 36, 39, 41, 42, 49, 51, 53, 54, 55], "exist": 11, "anyth": [11, 17, 19], "els": 11, "wonder": [11, 15], "url": [11, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "varieti": 11, "folder": [11, 21, 22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "wav": [11, 15, 22, 24, 25, 26, 27, 29, 31, 33, 34, 36, 39, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "scp": 11, "feat": 11, "put": [11, 13, 21, 24, 25, 41, 54], "l": [11, 21, 24, 25, 26, 33, 44, 45, 47], "waves_yesno": [11, 15, 21], "tar": [11, 21], "gz": [11, 21, 49], "l41": 11, "extract": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "yesno_cuts_test": 11, "jsonl": [11, 22], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 21, 40, 44, 45, 47], "l_disambig": [11, 47], "lexicon_disambig": [11, 47], "manifest": [11, 21, 37], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 21, 24, 25, 26, 31, 33, 34, 36, 39, 40, 44, 45, 53, 54, 55], "thei": [11, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "idea": [11, 15, 51], "examin": 11, "relat": [11, 22, 31, 33, 36, 40, 44, 45, 47, 57, 58], "gunzip": 11, "c": [11, 21, 33, 34, 39, 41, 42, 47, 53, 54, 55], "head": [11, 21, 33, 51], "output": [11, 12, 13, 15, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "id": [11, 31, 34, 36, 40, 44, 45], "0_0_0_0_1_1_1_1": 11, "channel": [11, 19, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 21, 22, 24, 25, 26, 33, 36, 53], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 21, 44, 45], "l300": 11, "mean": [11, 14, 15, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "field": [11, 32], "per": [11, 33, 39, 54, 55], "recording_id": 11, "NO": [11, 15, 47], "ye": [11, 15, 47], "hebrew": [11, 47], "supervis": 11, "l510": 11, "furthermor": [11, 33], "featur": [11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "compress": [11, 21], "lilcom": [11, 21], "cutset": 11, "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": 11, "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 19, 25, 26, 31, 32, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "separ": [11, 27, 49], "lang": [11, 21, 22, 33, 36, 42], "quit": [12, 14, 16, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "cuda_visible_devic": [12, 16, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "usag": [12, 15, 16, 22, 24, 25, 26, 28, 29, 40, 44, 45, 47], "one": [12, 19, 22, 24, 25, 26, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "tini": [13, 14], "well": [13, 22, 47, 59], "hundr": 13, "thousand": 13, "virtualenv": [13, 21], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 22], "pkg_resourc": 13, "wheel": [13, 21, 24], "remeb": 13, "continu": [13, 15, 24, 25, 26, 27, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "caution": [13, 31, 36], "matter": [13, 21, 24], "torchaduio": 13, "window": [13, 14, 19, 23, 24, 25, 26, 27], "from_wheel": [13, 15, 21], "dev20230726": [13, 15], "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 21, 24, 25, 26, 28, 31, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "walk": 14, "recognit": [14, 19, 20, 23, 24, 25, 32, 33, 47, 59], "system": [14, 49], "out": [14, 37, 49], "minut": [14, 49], "sequenti": 14, "part": [14, 15, 19, 21, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55, 57, 58], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 20, 30, 31, 33, 34, 36, 40, 44, 45, 47], "jit": [14, 20, 23, 30, 36], "onnx": [14, 20, 22, 30, 57, 58], "torchscript": [15, 23, 28, 29, 30], "trace": [15, 20, 23, 28, 30], "explain": 15, "kind": [15, 36, 39, 41, 42, 53, 54, 55], "produc": [15, 23, 39, 41, 42, 53, 54, 55], "03": [15, 21, 22, 25, 33, 36, 44, 45, 53], "912": [15, 22], "76": [15, 21, 47], "lr": [15, 21, 33, 53], "weight_decai": [15, 21], "1e": [15, 21], "start_epoch": [15, 21], "best_train_loss": [15, 21, 22, 24, 25, 26], "inf": [15, 21, 22, 24, 25, 26], "best_valid_loss": [15, 21, 22, 24, 25, 26], "best_train_epoch": [15, 21, 22, 24, 25, 26], "best_valid_epoch": [15, 21, 22, 25, 26], "batch_idx_train": [15, 21, 22, 24, 25, 26], "log_interv": [15, 21, 22, 24, 25, 26], "reset_interv": [15, 21, 22, 24, 25, 26], "valid_interv": [15, 21, 22, 24, 25, 26], "beam_siz": [15, 21, 22, 33], "sum": [15, 21], "913": 15, "950": 15, "971": [15, 45], "106": [15, 21, 25, 36], "Not": 15, "974": 15, "111": [15, 21, 36], "kei": [15, 24, 25, 26, 36], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 21, 24, 25, 31, 33], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 21, 24, 31, 33, 36], "26": [15, 21, 24, 25, 26, 33, 36, 45], "09": [15, 22, 25, 31, 33, 34, 36, 53], "aa073f6": 15, "none": [15, 21, 31, 36], "9a47c08": 15, "mon": [15, 25, 26], "aug": [15, 37], "50": [15, 21, 22, 24, 25, 26, 36, 39, 44, 53, 54, 55], "privat": 15, "fangjun": [15, 21, 22, 24, 25, 26, 33, 36], "macbook": 15, "pro": [15, 31, 36], "127": [15, 21, 24, 25, 47], "092": 15, "103": 15, "272": 15, "109": [15, 21, 31, 36], "112": [15, 24, 25, 26], "115": [15, 24, 25, 31, 36], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 33], "559": 15, "315": [15, 24, 31, 33, 34, 36, 40], "ident": 15, "kaldifeat": 15, "csukuangfj": [15, 21, 22, 24, 25, 27, 31, 33, 34, 36, 40, 44, 45, 47, 53], "0_0_0_1_0_0_0_1": [15, 47], "0_0_1_0_0_0_1_0": [15, 47], "19": [15, 22, 24, 25, 26, 31, 36, 40, 44, 45], "208": [15, 36], "136": [15, 36], "num_class": [15, 31, 36, 47], "sample_r": [15, 22, 31, 33, 36, 47], "words_fil": [15, 31, 36, 47], "sound_fil": [15, 22, 31, 33, 36, 47], "142": [15, 24, 31, 34, 36], "144": [15, 36], "212": 15, "213": [15, 47], "construct": [15, 22, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45, 47], "170": [15, 40], "sound": [15, 22, 24, 25, 26, 29, 30, 31, 33, 34, 36, 40, 44, 45, 47], "224": 15, "176": [15, 24, 33, 36], "304": [15, 25], "214": [15, 33, 36], "47": [15, 21, 24, 25, 26, 31, 36], "44": [15, 21, 24, 25, 36, 44, 45], "666": 15, "667": 15, "670": 15, "677": [15, 24], "100": [15, 21, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "843": 15, "cpu_jit": [15, 28, 31, 36, 39, 41, 42, 54, 55], "confus": [15, 28], "move": [15, 28, 39, 41, 42, 54, 55], "map_loc": 15, "resid": 15, "default": [15, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "jit_pretrain": [15, 29, 41, 42, 53], "nn": [15, 33, 39, 41, 42, 53, 54, 55], "56": [15, 21, 24, 25, 36, 44], "00": [15, 21, 24, 31, 33, 34, 36, 40, 44, 45, 47], "603": 15, "121": [15, 40], "nn_model": [15, 31, 36], "129": [15, 34], "640": [15, 21, 26], "134": [15, 31], "641": 15, "138": [15, 31, 33], "148": 15, "642": 15, "154": [15, 34], "727": 15, "190": [15, 40], "192": [15, 26, 36], "export_onnx": 15, "onnxruntim": [15, 27], "888": [15, 31], "83": [15, 36, 40], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 21, 22, 24, 31, 33, 36, 44, 45], "047": [15, 33], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 20, 36, 51, 54, 59], "vocab_s": [15, 22, 24, 25, 26, 33], "049": 15, "140": [15, 21, 34], "int8": [15, 23, 30, 57, 58], "quantiz": [15, 23, 30, 37], "075": 15, "onnx_quant": 15, "538": [15, 36], "tensor": [15, 21, 25, 26, 31, 33, 34, 36, 39, 47, 53, 54], "transpose_1_output_0": 15, "081": 15, "151": [15, 24], "float32": [15, 24, 25, 26], "onnx_pretrain": [15, 27], "260": [15, 26, 36], "166": 15, "171": [15, 21, 34, 36, 44, 45], "173": 15, "267": [15, 25, 33, 44, 45], "270": 15, "180": [15, 25, 31, 36], "279": [15, 36], "196": 15, "318": [15, 24, 25], "232": 15, "234": [15, 36], "deploi": [15, 27, 31, 36], "sherpa": [15, 19, 23, 28, 29, 30, 53], "framework": [15, 19, 39, 54], "_": [15, 37], "ncnn": [15, 20, 30], "youtub": [17, 20, 36, 37, 39, 40, 41, 42, 53, 54, 55], "video": [17, 20, 36, 37, 39, 40, 41, 42, 53, 54, 55], "upload": [18, 19, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "visit": [18, 19, 39, 41, 42, 53, 54, 55, 57, 58], "specif": [18, 27, 33], "aishel": [18, 20, 31, 33, 34, 35, 59], "wenetspeech": [18, 28], "ipad": 19, "phone": 19, "screenshot": [19, 31, 33, 34, 36, 37, 39, 47, 53, 54], "chines": [19, 32, 33], "english": [19, 47, 53], "greedi": 19, "click": [19, 21, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "button": 19, "submit": 19, "wait": 19, "moment": 19, "bottom": [19, 39, 41, 42, 53, 54, 55], "subscrib": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "nadira": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "povei": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "www": [19, 21, 32, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55], "uc_vaumpkminz1pnkfxan9mw": [19, 21, 36, 37, 39, 40, 41, 42, 53, 54, 55], "dummi": [20, 36], "toolkit": 20, "cudnn": 20, "docker": [20, 21], "frequent": 20, "ask": 20, "question": 20, "faq": 20, "oserror": 20, "libtorch_hip": 20, "attributeerror": 20, "distutil": 20, "attribut": [20, 26, 36], "libpython3": 20, "timit": [20, 35, 44, 45, 59], "tt": [20, 57, 58, 59], "vit": [20, 56, 59], "contribut": 20, "support": [21, 23, 24, 25, 26, 31, 33, 36, 39, 41, 42, 51, 53, 54, 55, 57, 58], "guid": 21, "suggest": [21, 39, 41, 42, 53, 54, 55], "alwai": [21, 22], "strongli": 21, "point": [21, 22, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "sever": [21, 22, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 51, 53, 54, 55], "just": [21, 24, 25, 26, 49, 51], "kuangfangjun": [21, 24, 25, 26], "cpython3": 21, "final": [21, 22, 24, 25, 36, 40], "64": [21, 22, 24, 33, 54], "9422m": 21, "creator": 21, "cpython3posix": 21, "dest": 21, "star": [21, 24, 25, 26], "fj": [21, 22, 24, 25, 26, 33, 36], "clear": 21, "no_vcs_ignor": 21, "global": 21, "seeder": 21, "fromappdata": 21, "bundl": 21, "app_data_dir": 21, "ad": [21, 24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 51, 53, 54, 55], "seed": 21, "bashactiv": 21, "cshellactiv": 21, "fishactiv": 21, "nushellactiv": 21, "powershellactiv": 21, "pythonactiv": 21, "determin": 21, "nvidia": [21, 31, 33, 34, 36], "smi": 21, "49": [21, 24, 25, 36, 45, 47], "510": 21, "driver": 21, "greater": 21, "our": [21, 24, 25, 26, 28, 29, 36, 37, 39, 51, 54, 55], "case": [21, 22, 24, 25, 26, 39, 41, 42, 53, 54, 55], "verifi": 21, "nvcc": 21, "copyright": 21, "2005": 21, "2019": 21, "corpor": 21, "wed_oct_23_19": 21, "38_pdt_2019": 21, "v10": 21, "89": [21, 31], "cu116": 21, "compat": 21, "stabl": 21, "matrix": 21, "2bcu116": 21, "cp38": 21, "linux_x86_64": 21, "1983": 21, "mb": [21, 24, 25, 26], "________________________________________": 21, "gb": [21, 33], "764": 21, "kb": [21, 24, 25, 26, 44, 45], "eta": 21, "satisfi": 21, "extens": 21, "__version__": 21, "dev20230725": 21, "pypi": 21, "tuna": 21, "tsinghua": 21, "edu": 21, "cn": 21, "resolv": 21, "main": [21, 31, 36, 51], "ubuntu": [21, 24, 25, 26], "2bcuda11": 21, "manylinux_2_17_x86_64": 21, "manylinux2014_x86_64": 21, "graphviz": 21, "cach": [21, 26], "de": [21, 22, 24, 25, 26, 33], "5e": 21, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 21, "o": 21, "cento": 21, "2009": 21, "core": 21, "cmake": [21, 24, 25, 31, 36], "27": [21, 24, 25, 26, 31, 33, 40, 45], "gcc": 21, "cmake_cuda_flag": 21, "wno": 21, "deprec": [21, 33], "lineinfo": 21, "expt": 21, "extend": 21, "lambda": 21, "use_fast_math": 21, "xptxa": 21, "gencod": 21, "arch": 21, "compute_35": 21, "sm_35": 21, "compute_50": 21, "sm_50": 21, "compute_60": 21, "sm_60": 21, "compute_61": 21, "sm_61": 21, "compute_70": 21, "sm_70": 21, "compute_75": 21, "sm_75": 21, "compute_80": 21, "sm_80": 21, "compute_86": 21, "sm_86": 21, "donnx_namespac": 21, "onnx_c2": 21, "compute_52": 21, "sm_52": 21, "xcudaf": 21, "diag_suppress": 21, "cc_clobber_ignor": 21, "integer_sign_chang": 21, "useless_using_declar": 21, "set_but_not_us": 21, "field_without_dll_interfac": 21, "base_class_has_different_dll_interfac": 21, "dll_interface_conflict_none_assum": 21, "dll_interface_conflict_dllexport_assum": 21, "implicit_return_from_non_void_funct": 21, "unsigned_compare_with_zero": 21, "declared_but_not_referenc": 21, "bad_friend_decl": 21, "relax": 21, "constexpr": 21, "d_glibcxx_use_cxx11_abi": 21, "option": [21, 23, 27, 30, 33, 37, 40, 44, 45, 47], "wall": 21, "strict": [21, 26, 32], "overflow": 21, "unknown": 21, "pragma": 21, "cmake_cxx_flag": 21, "unus": 21, "nvtx": 21, "enabl": [21, 37], "disabl": [21, 22, 24, 25], "debug": 21, "sync": 21, "kernel": [21, 24, 26, 33], "memori": [21, 24, 31, 33, 36, 51], "alloc": 21, "214748364800": 21, "byte": [21, 24, 25, 26], "200": [21, 22, 24, 25, 26, 31, 36, 37, 44, 45, 47], "abort": 21, "__file__": 21, "cpython": [21, 24], "gnu": [21, 24], "req": 21, "vq12fd5i": 21, "filter": 21, "quiet": [21, 32], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 21, "metadata": [21, 44, 45], "pyproject": 21, "toml": 21, "cytoolz": 21, "3b": 21, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 21, "33": [21, 24, 25, 31, 32, 33, 36, 44], "pyyaml": 21, "c8": 21, "6b": 21, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 21, "ma": 21, "nylinux_2_17_x86_64": 21, "736": 21, "dataclass": 21, "2f": 21, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 21, "dev0": 21, "7640d66": 21, "a8": 21, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 21, "linux_2_17_x86_64": 21, "87": [21, 24], "tqdm": 21, "e6": 21, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 21, "numpi": 21, "audioread": 21, "5d": 21, "cb": 21, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 21, "377": 21, "tabul": 21, "40": [21, 24, 25, 26, 34, 36, 40, 44, 45], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 21, "1a": 21, "70": 21, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 21, "97": [21, 24, 31], "ab": [21, 39, 53, 54, 55], "c3": 21, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 21, "intervaltre": 21, "fb": 21, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 21, "soundfil": 21, "bd": 21, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 21, "py2": 21, "46": [21, 25, 31, 36], "toolz": 21, "7f": 21, "5c": 21, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 21, "55": [21, 24, 34, 36, 44], "sortedcontain": 21, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 21, "cffi": 21, "b7": 21, "8b": 21, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 21, "15": [21, 22, 24, 25, 26, 33, 34, 36, 44, 47, 49], "442": 21, "pycpars": 21, "d5": 21, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 21, "118": [21, 36], "filenam": [21, 24, 25, 26, 27, 28, 29, 41, 42, 53, 55, 57, 58], "size": [21, 22, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "687627": 21, "sha256": 21, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 21, "ephem": 21, "wwtk90_m": 21, "7a": 21, "8e": 21, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 21, "23704": 21, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 21, "9c": 21, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 21, "26098": 21, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 21, "f3": 21, "ed": 21, "2b": 21, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 21, "remot": 21, "enumer": 21, "12942": 21, "count": 21, "total": [21, 25, 26, 31, 33, 34, 36, 37, 39, 40, 47, 53, 54], "delta": 21, "reus": 21, "pack": [21, 49, 54, 55], "12875": 21, "receiv": 21, "mib": 21, "8835": 21, "41": [21, 24, 26, 31, 33, 44, 47], "dl_dir": [21, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "___________________________________________________": 21, "70m": 21, "1mb": 21, "718": 21, "compute_fbank_yesno": 21, "_______________________________________________________________________________": 21, "90": [21, 24], "82it": 21, "778": 21, "______________________________________________________________________________": 21, "256": [21, 26, 44, 45], "92it": 21, "51": [21, 24, 31, 36, 47], "66": [21, 25], "project": 21, "kaldilm": 21, "csrc": [21, 36], "arpa_file_pars": 21, "cc": 21, "void": 21, "arpafilepars": 21, "std": 21, "istream": 21, "79": 21, "92": [21, 36], "275": [21, 31], "compile_hlg": 21, "124": [21, 31, 36], "276": 21, "convert": [21, 24, 25, 26, 36], "309": 21, "ctc_topo": 21, "max_token_id": 21, "310": 21, "314": 21, "intersect": [21, 39, 54, 55], "323": 21, "lg": [21, 39, 42, 54, 55], "shape": [21, 26], "connect": [21, 22, 36, 39, 40, 53, 54, 55], "68": [21, 36], "class": [21, 36], "71": [21, 36, 40], "341": 21, "rag": 21, "raggedtensor": 21, "remov": [21, 31, 33, 34, 36, 40, 44, 45], "disambigu": 21, "354": 21, "91": 21, "remove_epsilon": 21, "445": 21, "arc": 21, "compos": 21, "h": 21, "446": 21, "447": 21, "segment": 21, "fault": 21, "dump": 21, "protocol_buffers_python_implement": 21, "674": 21, "interest": [21, 37, 39, 41, 42, 53, 54, 55], "936": 21, "481": 21, "482": 21, "world_siz": [21, 37], "master_port": 21, "12354": 21, "num_epoch": 21, "3fb0a43": 21, "thu": [21, 22, 24, 25, 26, 33, 36, 40], "05": [21, 22, 24, 25, 31, 33, 34, 36, 45, 49, 58], "74279": [21, 22, 24, 25, 26, 33], "1220091118": 21, "57c4d55446": 21, "sph26": 21, "941": 21, "949": 21, "495": 21, "965": [21, 31], "146": 21, "244": 21, "967": 21, "149": [21, 24, 36], "199": [21, 36, 40], "singlecutsampl": 21, "205": [21, 36], "968": 21, "565": [21, 36], "422": 21, "loss": [21, 24, 25, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "065": 21, "over": [21, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "2436": 21, "frame": [21, 33, 39, 41, 54, 55], "tot_loss": 21, "681": [21, 24], "4561": 21, "2828": 21, "7076": 21, "22192": 21, "54": [21, 25, 26, 36, 40, 44, 45], "167": 21, "444": 21, "9002": 21, "18067": 21, "011": 21, "2555": 21, "2695": 21, "484": 21, "34971": 21, "331": [21, 24, 25, 36, 40], "4688": 21, "368": 21, "75": [21, 24], "633": 21, "2532": 21, "242": [21, 31, 36], "1139": 21, "1592": 21, "522": [21, 36], "1627": 21, "209": [21, 40], "07055": 21, "1175": 21, "07091": 21, "847": 21, "07731": 21, "427": [21, 25, 36], "04391": 21, "05341": 21, "884": 21, "04384": 21, "387": [21, 45], "03458": 21, "04616": 21, "707": [21, 31, 36], "03379": 21, "758": [21, 36], "433": [21, 36], "01054": 21, "980": [21, 36], "009014": 21, "009974": 21, "489": [21, 31], "01085": 21, "258": [21, 44, 45], "01172": 21, "01055": 21, "621": [21, 47], "01074": 21, "699": 21, "866": 21, "01044": 21, "844": 21, "008942": 21, "221": [21, 36], "01082": 21, "970": [21, 36], "01169": 21, "247": 21, "01073": 21, "326": [21, 25], "555": 21, "840": 21, "841": 21, "855": 21, "868": 21, "882": 21, "883": 21, "157": 21, "701": 21, "702": [21, 36], "704": [21, 31, 44], "fun": [21, 24, 25], "variou": [21, 27, 30, 59], "period": [22, 24], "disk": 22, "optim": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "resum": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "strip": 22, "reduc": [22, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pruned_transducer_stateless3": [22, 28, 51], "almost": [22, 39, 51, 54, 55], "dict": [22, 26], "stateless3": [22, 24], "repo": [22, 27], "those": 22, "wave": [22, 24, 25, 26, 31, 36], "iter": [22, 24, 25, 26, 29, 39, 41, 42, 53, 54, 55], "1224000": 22, "greedy_search": [22, 33, 39, 41, 53, 54, 55], "test_wav": [22, 24, 25, 26, 27, 31, 33, 34, 36, 40, 44, 45, 47], "1089": [22, 24, 25, 26, 27, 36, 40], "134686": [22, 24, 25, 26, 27, 36, 40], "0001": [22, 24, 25, 26, 27, 36, 40], "1221": [22, 24, 25, 36, 40], "135766": [22, 24, 25, 36, 40], "0002": [22, 24, 25, 36, 40], "multipl": [22, 31, 33, 34, 36, 40, 44, 45, 47], "Its": [22, 24, 25, 26, 36], "233": [22, 24, 25], "265": 22, "3000": [22, 24, 25, 26], "80": [22, 24, 25, 26, 31, 33, 36], "subsampling_factor": [22, 25, 26, 31, 33, 36], "encoder_dim": [22, 24, 25, 26], "512": [22, 24, 25, 26, 31, 33, 36], "nhead": [22, 24, 26, 31, 33, 36, 39, 54], "dim_feedforward": [22, 24, 25, 33], "num_encoder_lay": [22, 24, 25, 26, 33], "decoder_dim": [22, 24, 25, 26], "joiner_dim": [22, 24, 25, 26], "model_warm_step": [22, 24, 25], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 22, "oct": [22, 36], "miss": [22, 24, 25, 26, 33, 36], "cu102": [22, 24, 25, 26], "1013": 22, "c39cba5": 22, "dirti": [22, 24, 25, 31, 36], "ceph": [22, 31, 33, 36], "0324160024": 22, "65bfd8b584": 22, "jjlbn": 22, "bpe_model": [22, 24, 25, 26, 36], "16000": [22, 31, 33, 34, 36, 40, 41, 44, 45], "max_context": 22, "max_stat": 22, "context_s": [22, 24, 25, 26, 33], "max_sym_per_fram": [22, 33], "simulate_stream": 22, "decode_chunk_s": 22, "left_context": 22, "dynamic_chunk_train": 22, "causal_convolut": 22, "short_chunk_s": [22, 26, 54, 55], "num_left_chunk": [22, 26], "blank_id": [22, 24, 25, 26, 33], "unk_id": 22, "271": [22, 25], "612": 22, "458": 22, "giga": [22, 25, 53], "623": 22, "277": 22, "78648040": 22, "951": [22, 36], "285": [22, 33, 36], "952": 22, "295": [22, 31, 33, 34, 36], "957": 22, "301": [22, 36], "700": 22, "329": [22, 25, 36], "388": 22, "earli": [22, 24, 25, 26, 36, 40], "nightfal": [22, 24, 25, 26, 36, 40], "THE": [22, 24, 25, 26, 36, 40], "yellow": [22, 24, 25, 26, 36, 40], "lamp": [22, 24, 25, 26, 36, 40], "light": [22, 24, 25, 26, 36, 40], "AND": [22, 24, 25, 26, 36, 40], "THERE": [22, 24, 25, 26, 36, 40], "squalid": [22, 24, 25, 26, 36, 40], "quarter": [22, 24, 25, 26, 36, 40], "OF": [22, 24, 25, 26, 36, 40], "brothel": [22, 24, 25, 26, 36, 40], "god": [22, 36, 40], "AS": [22, 36, 40], "direct": [22, 36, 40], "consequ": [22, 36, 40], "sin": [22, 36, 40], "man": [22, 36, 40], "punish": [22, 36, 40], "had": [22, 36, 40], "her": [22, 36, 40], "love": [22, 36, 40], "child": [22, 36, 40], "whose": [22, 33, 36, 40], "ON": [22, 24, 36, 40], "THAT": [22, 36, 40], "dishonor": [22, 36, 40], "bosom": [22, 36, 40], "TO": [22, 36, 40], "parent": [22, 36, 40], "forev": [22, 36, 40], "WITH": [22, 36, 40], "race": [22, 36, 40], "descent": [22, 36, 40], "mortal": [22, 36, 40], "BE": [22, 36, 40], "bless": [22, 36, 40], "soul": [22, 36, 40], "IN": [22, 36, 40], "heaven": [22, 36, 40], "yet": [22, 24, 25, 36, 40], "THESE": [22, 36, 40], "thought": [22, 36, 40], "affect": [22, 36, 40], "hester": [22, 36, 40], "prynn": [22, 36, 40], "hope": [22, 32, 36, 40], "apprehens": [22, 36, 40], "390": 22, "down": [22, 31, 36, 39, 41, 42, 53, 54, 55], "reproduc": [22, 36], "9999": [22, 41, 42, 53], "symlink": 22, "pass": [22, 26, 31, 33, 34, 36, 39, 41, 42, 51, 53, 54, 55], "zipform": [23, 27, 30, 35, 38, 49, 50, 52], "convemform": [23, 30, 51], "platform": [23, 27], "android": [23, 24, 25, 26, 27], "raspberri": [23, 27], "pi": [23, 27], "\u7231\u82af\u6d3e": 23, "maix": 23, "iii": 23, "axera": 23, "rv1126": 23, "static": 23, "binari": [23, 24, 25, 26, 31, 33, 34, 36, 39, 47, 53, 54], "pnnx": [23, 30], "encod": [23, 27, 29, 30, 31, 33, 34, 36, 39, 40, 41, 47, 51, 53, 54, 55], "conv": [24, 25], "emform": [24, 25, 28], "stateless2": [24, 25, 53], "pretrained_model": [24, 25, 26], "online_transduc": 24, "jit_xxx": [24, 25, 26], "anywher": [24, 25], "submodul": 24, "recurs": 24, "init": 24, "dcmake_build_typ": [24, 31, 36], "dncnn_python": 24, "dncnn_build_benchmark": 24, "dncnn_build_exampl": 24, "dncnn_build_tool": 24, "j4": 24, "pwd": 24, "src": [24, 26], "compon": [24, 51], "ncnn2int8": [24, 25], "am": 24, "sai": [24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55], "later": [24, 25, 26, 31, 34, 36, 39, 40, 41, 42, 44, 45, 53, 54, 55], "termin": 24, "tencent": [24, 25], "modif": [24, 33], "offici": 24, "synchron": 24, "renam": [24, 25, 26], "conv_emformer_transducer_stateless2": [24, 51], "length": [24, 26, 33, 49, 54, 55], "cnn": [24, 26], "31": [24, 25, 26, 36], "context": [24, 33, 39, 51, 53, 54, 55], "configur": [24, 26, 33, 37, 40, 44, 45, 47, 57, 58], "accordingli": [24, 25, 26], "yourself": [24, 25, 26, 37, 54, 55], "220": [24, 33, 34, 36], "229": [24, 31], "best_v": 24, "alid_epoch": 24, "subsampl": [24, 54, 55], "ing_factor": 24, "a34171ed85605b0926eebbd0463d059431f4f74a": 24, "dec": 24, "ver": 24, "ion": 24, "530e8a1": 24, "op": 24, "1220120619": [24, 25, 26], "7695ff496b": [24, 25, 26], "s9n4w": [24, 25, 26], "icefa": 24, "ll": 24, "transdu": 24, "cer": 24, "use_averaged_model": [24, 25, 26], "cnn_module_kernel": [24, 26], "left_context_length": 24, "chunk_length": 24, "right_context_length": 24, "memory_s": 24, "231": [24, 25, 26], "053": 24, "022": 24, "708": [24, 31, 33, 36, 47], "75490012": 24, "320": [24, 33], "682": 24, "lh": [24, 25, 26], "rw": [24, 25, 26], "289m": 24, "jan": [24, 25, 26], "289": 24, "roughli": [24, 25, 26], "equal": [24, 25, 26, 54, 55], "1024": [24, 25, 26, 53], "287": [24, 47], "1010k": [24, 25], "decoder_jit_trac": [24, 25, 26, 29, 53, 55], "283m": 24, "encoder_jit_trac": [24, 25, 26, 29, 53, 55], "0m": [24, 25], "joiner_jit_trac": [24, 25, 26, 29, 53, 55], "sure": [24, 25, 26], "found": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "param": [24, 25, 26], "503k": [24, 25], "437": [24, 25, 26], "142m": 24, "79k": 24, "5m": [24, 25], "architectur": [24, 25, 26, 53], "editor": [24, 25, 26], "content": [24, 25, 26], "283": [24, 26], "1010": [24, 25], "503": [24, 25], "convers": [24, 25, 26], "half": [24, 25, 26, 39, 54, 55], "v": [24, 25, 26, 36, 44, 45], "float16": [24, 25, 26], "occupi": [24, 25, 26], "twice": [24, 25, 26], "smaller": [24, 25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "fp16": [24, 25, 26, 39, 41, 42, 49, 53, 54, 55, 57, 58], "won": [24, 25, 26, 27, 31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "accept": [24, 25, 26], "216": [24, 31, 36, 44, 45], "encoder_param_filenam": [24, 25, 26], "encoder_bin_filenam": [24, 25, 26], "decoder_param_filenam": [24, 25, 26], "decoder_bin_filenam": [24, 25, 26], "joiner_param_filenam": [24, 25, 26], "joiner_bin_filenam": [24, 25, 26], "sound_filenam": [24, 25, 26], "141": 24, "328": 24, "336": 24, "106000": [24, 25, 26, 36, 40], "581": [24, 40], "381": 24, "7767517": [24, 25, 26], "1060": 24, "1342": 24, "in0": [24, 25, 26], "explan": [24, 25, 26], "magic": [24, 25, 26], "intermedi": [24, 25, 26], "increment": [24, 25, 26], "1061": 24, "sherpametadata": [24, 25, 26], "sherpa_meta_data1": [24, 25, 26], "newli": [24, 25, 26], "must": [24, 25, 26, 54], "eas": [24, 25, 26], "pair": [24, 25, 26], "sad": [24, 25, 26], "rememb": [24, 25, 26], "anymor": [24, 25, 26], "flexibl": [24, 25, 26], "edit": [24, 25, 26], "arm": [24, 25, 26], "aarch64": [24, 25, 26], "onc": [24, 25], "mayb": [24, 25], "year": [24, 25], "_jit_trac": [24, 25], "fp32": [24, 25], "doubl": [24, 25], "j": [24, 25, 31, 36], "py38": [24, 25, 26], "arg": [24, 25], "wave_filenam": [24, 25], "16k": [24, 25], "hz": [24, 25, 44, 45], "mono": [24, 25], "calibr": [24, 25], "cat": [24, 25], "eof": [24, 25], "calcul": [24, 25, 41, 54, 55], "has_gpu": [24, 25], "config": [24, 25], "use_vulkan_comput": [24, 25], "88": [24, 33], "conv_87": 24, "942385": [24, 25], "threshold": [24, 25, 41], "938493": 24, "968131": 24, "conv_88": 24, "442448": 24, "549335": 24, "167552": 24, "conv_89": 24, "228289": 24, "001738": 24, "871552": 24, "linear_90": 24, "976146": 24, "101789": 24, "267128": 24, "linear_91": 24, "962030": 24, "162033": 24, "602713": 24, "linear_92": 24, "323041": 24, "853959": 24, "953129": 24, "linear_94": 24, "905416": 24, "648006": 24, "323545": 24, "linear_93": 24, "474093": 24, "200188": 24, "linear_95": 24, "888012": 24, "403563": 24, "483986": 24, "linear_96": 24, "856741": 24, "398679": 24, "524273": 24, "linear_97": 24, "635942": 24, "613655": 24, "590950": 24, "linear_98": 24, "460340": 24, "670146": 24, "398010": 24, "linear_99": 24, "532276": 24, "585537": 24, "119396": 24, "linear_101": 24, "585871": 24, "719224": 24, "205809": 24, "linear_100": 24, "751382": 24, "081648": 24, "linear_102": 24, "593344": 24, "450581": 24, "551147": 24, "linear_103": 24, "592681": 24, "705824": 24, "257959": 24, "linear_104": 24, "752957": 24, "980955": 24, "110489": 24, "linear_105": 24, "696240": 24, "877193": 24, "608953": 24, "linear_106": 24, "059659": 24, "643138": 24, "048950": 24, "linear_108": 24, "975461": 24, "589567": 24, "671457": 24, "linear_107": 24, "190381": 24, "515701": 24, "linear_109": 24, "710759": 24, "305635": 24, "082436": 24, "linear_110": 24, "531228": 24, "731162": 24, "159557": 24, "linear_111": 24, "528083": 24, "259322": 24, "211544": 24, "linear_112": 24, "148807": 24, "500842": 24, "087374": 24, "linear_113": 24, "592566": 24, "948851": 24, "166611": 24, "linear_115": 24, "437109": 24, "608947": 24, "642395": 24, "linear_114": 24, "193942": 24, "503904": 24, "linear_116": 24, "966980": 24, "200896": 24, "676392": 24, "linear_117": 24, "451303": 24, "061664": 24, "951344": 24, "linear_118": 24, "077262": 24, "965800": 24, "023804": 24, "linear_119": 24, "671615": 24, "847613": 24, "198460": 24, "linear_120": 24, "625638": 24, "131427": 24, "556595": 24, "linear_122": 24, "274080": 24, "888716": 24, "978189": 24, "linear_121": 24, "420480": 24, "429659": 24, "linear_123": 24, "826197": 24, "599617": 24, "281532": 24, "linear_124": 24, "396383": 24, "325849": 24, "335875": 24, "linear_125": 24, "337198": 24, "941410": 24, "221970": 24, "linear_126": 24, "699965": 24, "842878": 24, "224073": 24, "linear_127": 24, "775370": 24, "884215": 24, "696438": 24, "linear_129": 24, "872276": 24, "837319": 24, "254213": 24, "linear_128": 24, "180057": 24, "687883": 24, "linear_130": 24, "150427": 24, "454298": 24, "765789": 24, "linear_131": 24, "112692": 24, "924847": 24, "025545": 24, "linear_132": 24, "852893": 24, "116593": 24, "749626": 24, "linear_133": 24, "517084": 24, "024665": 24, "275314": 24, "linear_134": 24, "683807": 24, "878618": 24, "743618": 24, "linear_136": 24, "421055": 24, "322729": 24, "086264": 24, "linear_135": 24, "309880": 24, "917679": 24, "linear_137": 24, "827781": 24, "744595": 24, "915554": 24, "linear_138": 24, "422395": 24, "742882": 24, "402161": 24, "linear_139": 24, "527538": 24, "866123": 24, "849449": 24, "linear_140": 24, "128619": 24, "657793": 24, "266134": 24, "linear_141": 24, "839593": 24, "845993": 24, "021378": 24, "linear_143": 24, "442304": 24, "099039": 24, "889746": 24, "linear_142": 24, "325038": 24, "849592": 24, "linear_144": 24, "929444": 24, "618206": 24, "605080": 24, "linear_145": 24, "382126": 24, "321095": 24, "625010": 24, "linear_146": 24, "894987": 24, "867645": 24, "836517": 24, "linear_147": 24, "915313": 24, "906028": 24, "886522": 24, "linear_148": 24, "614287": 24, "908151": 24, "496181": 24, "linear_150": 24, "724932": 24, "485588": 24, "312899": 24, "linear_149": 24, "161146": 24, "606939": 24, "linear_151": 24, "164453": 24, "847355": 24, "719223": 24, "linear_152": 24, "086471": 24, "984121": 24, "222834": 24, "linear_153": 24, "099524": 24, "991601": 24, "816805": 24, "linear_154": 24, "054585": 24, "489706": 24, "286930": 24, "linear_155": 24, "389185": 24, "100321": 24, "963501": 24, "linear_157": 24, "982999": 24, "154796": 24, "637253": 24, "linear_156": 24, "537706": 24, "875190": 24, "linear_158": 24, "420287": 24, "502287": 24, "531588": 24, "linear_159": 24, "014746": 24, "423280": 24, "477261": 24, "linear_160": 24, "633553": 24, "715335": 24, "220921": 24, "linear_161": 24, "371849": 24, "117830": 24, "815203": 24, "linear_162": 24, "492933": 24, "126283": 24, "623318": 24, "linear_164": 24, "697504": 24, "825712": 24, "317358": 24, "linear_163": 24, "078367": 24, "008038": 24, "linear_165": 24, "023975": 24, "836278": 24, "577358": 24, "linear_166": 24, "860619": 24, "259792": 24, "493614": 24, "linear_167": 24, "380934": 24, "496160": 24, "107042": 24, "linear_168": 24, "691216": 24, "733317": 24, "831076": 24, "linear_169": 24, "723948": 24, "952728": 24, "129707": 24, "linear_171": 24, "034811": 24, "366547": 24, "665123": 24, "linear_170": 24, "356277": 24, "710501": 24, "linear_172": 24, "556884": 24, "729481": 24, "166058": 24, "linear_173": 24, "033039": 24, "207264": 24, "442120": 24, "linear_174": 24, "597379": 24, "658676": 24, "768131": 24, "linear_2": [24, 25], "293503": 24, "305265": 24, "877850": 24, "linear_1": [24, 25], "812222": 24, "766452": 24, "487047": 24, "linear_3": [24, 25], "999999": 24, "999755": 24, "031174": 24, "wish": [24, 25], "955k": 24, "18k": 24, "inparam": [24, 25], "inbin": [24, 25], "outparam": [24, 25], "outbin": [24, 25], "99m": 24, "78k": 24, "774k": [24, 25], "496": [24, 25, 36, 40], "replac": [24, 25], "774": [24, 25], "linear": [24, 25, 33], "convolut": [24, 25, 41, 51, 54], "exact": [24, 25], "4x": [24, 25], "comparison": 24, "468000": [25, 29, 53], "lstm_transducer_stateless2": [25, 29, 53], "862": 25, "222": [25, 34, 36], "865": 25, "is_pnnx": 25, "62e404dd3f3a811d73e424199b3408e309c06e1a": [25, 26], "6d7a559": [25, 26], "feb": [25, 26, 33], "147": [25, 26], "rnn_hidden_s": 25, "aux_layer_period": 25, "235": 25, "239": [25, 33], "472": 25, "595": 25, "324": 25, "83137520": 25, "596": 25, "325": 25, "257024": 25, "781812": 25, "327": 25, "84176356": 25, "182": [25, 26, 31, 40], "158": 25, "183": [25, 44, 45], "335": 25, "101": 25, "tracerwarn": [25, 26], "boolean": [25, 26], "caus": [25, 26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "incorrect": [25, 26, 33], "flow": [25, 26], "constant": [25, 26], "futur": [25, 26, 33, 59], "need_pad": 25, "bool": 25, "259": [25, 31], "339": 25, "207": [25, 34, 36], "84": [25, 31], "324m": 25, "321": [25, 31], "107": [25, 40], "318m": 25, "159m": 25, "21k": 25, "159": [25, 36, 47], "37": [25, 31, 33, 36, 44], "861": 25, "266": [25, 26, 36, 40], "431": 25, "342": 25, "343": 25, "379": 25, "268": [25, 36, 40], "317m": 25, "317": 25, "conv_15": 25, "930708": 25, "972025": 25, "conv_16": 25, "978855": 25, "031788": 25, "456645": 25, "conv_17": 25, "868437": 25, "830528": 25, "218575": 25, "linear_18": 25, "107259": 25, "194808": 25, "293236": 25, "linear_19": 25, "193777": 25, "634748": 25, "401705": 25, "linear_20": 25, "259933": 25, "606617": 25, "722160": 25, "linear_21": 25, "186600": 25, "790260": 25, "512129": 25, "linear_22": 25, "759041": 25, "265832": 25, "050053": 25, "linear_23": 25, "931209": 25, "099090": 25, "979767": 25, "linear_24": 25, "324160": 25, "215561": 25, "321835": 25, "linear_25": 25, "800708": 25, "599352": 25, "284134": 25, "linear_26": 25, "492444": 25, "153369": 25, "274391": 25, "linear_27": 25, "660161": 25, "720994": 25, "674126": 25, "linear_28": 25, "415265": 25, "174434": 25, "007133": 25, "linear_29": 25, "038418": 25, "118534": 25, "724262": 25, "linear_30": 25, "072084": 25, "936867": 25, "259155": 25, "linear_31": 25, "342712": 25, "599489": 25, "282787": 25, "linear_32": 25, "340535": 25, "120308": 25, "701103": 25, "linear_33": 25, "846987": 25, "630030": 25, "985939": 25, "linear_34": 25, "686298": 25, "204571": 25, "607586": 25, "linear_35": 25, "904821": 25, "575518": 25, "756420": 25, "linear_36": 25, "806659": 25, "585589": 25, "118401": 25, "linear_37": 25, "402340": 25, "047157": 25, "162680": 25, "linear_38": 25, "174589": 25, "923361": 25, "030258": 25, "linear_39": 25, "178576": 25, "556058": 25, "807705": 25, "linear_40": 25, "901954": 25, "301267": 25, "956539": 25, "linear_41": 25, "839805": 25, "597429": 25, "716181": 25, "linear_42": 25, "178945": 25, "651595": 25, "895699": 25, "829245": 25, "627592": 25, "637907": 25, "746186": 25, "255032": 25, "167313": 25, "000000": 25, "999756": 25, "031013": 25, "345k": 25, "17k": 25, "218m": 25, "counterpart": 25, "bit": [25, 31, 33, 34, 36, 40, 47], "4532": 25, "feedforward": [26, 33, 39, 54], "384": [26, 36], "unmask": 26, "downsampl": [26, 32], "factor": [26, 31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "473": [26, 36], "246": [26, 33, 36, 44, 45], "477": 26, "warm_step": 26, "2000": [26, 34], "feedforward_dim": 26, "attention_dim": [26, 31, 33, 36], "encoder_unmasked_dim": 26, "zipformer_downsampling_factor": 26, "decode_chunk_len": 26, "257": [26, 33, 44, 45], "023": 26, "zipformer2": 26, "419": 26, "At": [26, 31, 36], "stack": 26, "downsampling_factor": 26, "037": 26, "655": 26, "346": 26, "68944004": 26, "347": 26, "260096": 26, "348": [26, 44], "716276": 26, "656": [26, 36], "349": 26, "69920376": 26, "351": 26, "353": 26, "174": [26, 36], "175": 26, "1344": 26, "assert": 26, "cached_len": 26, "num_lay": 26, "1348": 26, "cached_avg": 26, "1352": 26, "cached_kei": 26, "1356": 26, "cached_v": 26, "1360": 26, "cached_val2": 26, "1364": 26, "cached_conv1": 26, "1368": 26, "cached_conv2": 26, "1373": 26, "left_context_len": 26, "1884": 26, "x_size": 26, "2442": 26, "2449": 26, "2469": 26, "2473": 26, "2483": 26, "kv_len": 26, "k": [26, 39, 44, 45, 53, 54, 55], "2570": 26, "attn_output": 26, "bsz": 26, "num_head": 26, "seq_len": 26, "head_dim": 26, "2926": 26, "lorder": 26, "2652": 26, "2653": 26, "embed_dim": 26, "2666": 26, "1543": 26, "in_x_siz": 26, "1637": 26, "1643": 26, "in_channel": 26, "1571": 26, "1763": 26, "src1": 26, "src2": 26, "1779": 26, "dim1": 26, "1780": 26, "dim2": 26, "_trace": 26, "958": 26, "tracer": 26, "instead": [26, 33, 54], "tupl": 26, "namedtupl": 26, "absolut": 26, "know": [26, 37], "side": 26, "allow": [26, 39, 54], "behavior": [26, 33], "_c": 26, "_create_method_from_trac": 26, "646": 26, "357": 26, "102": [26, 31], "embedding_out": 26, "686": 26, "361": [26, 36, 40], "735": 26, "69": 26, "269m": 26, "269": [26, 31, 44, 45], "725": [26, 40], "1022k": 26, "266m": 26, "8m": 26, "509k": 26, "133m": 26, "152k": 26, "4m": 26, "1022": 26, "133": 26, "509": 26, "360": 26, "365": 26, "280": [26, 36], "372": [26, 31], "state": [26, 31, 33, 34, 36, 39, 41, 42, 49, 53, 54, 55], "026": 26, "410": 26, "411": [26, 36], "2028": 26, "2547": 26, "2029": 26, "23316": 26, "23317": 26, "23318": 26, "23319": 26, "23320": 26, "amount": [26, 32], "pad": [26, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "conv2dsubsampl": 26, "arrai": 26, "23300": 26, "element": 26, "repo_url": 27, "basenam": 27, "why": 28, "streaming_asr": [28, 29, 53, 54, 55], "conv_emform": 28, "offline_asr": [28, 39], "baz": 29, "1best": [31, 34, 36, 40, 41, 42, 44, 45], "automag": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "stop": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "By": [31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "musan": [31, 34, 36, 37, 39, 41, 42, 53, 54, 55], "apt": [31, 34], "permiss": [31, 34], "commandlin": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "experi": [31, 33, 34, 36, 37, 39, 41, 42, 47, 53, 54, 55], "world": [31, 33, 34, 36, 37, 39, 40, 41, 42, 49, 53, 54, 55, 57, 58], "multi": [31, 33, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "machin": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "ddp": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "implement": [31, 33, 34, 36, 37, 39, 41, 42, 51, 53, 54, 55], "utter": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "oom": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "due": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "decai": [31, 34, 36, 41, 42, 53], "warmup": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "function": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "get_param": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "directli": [31, 33, 34, 36, 37, 39, 41, 42, 53, 54, 55], "perturb": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "3x150": [31, 33, 34], "450": [31, 33, 34], "visual": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "logdir": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "labelsmooth": 31, "tensorflow": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54], "press": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "ctrl": [31, 33, 34, 36, 39, 41, 42, 47, 53, 54, 55], "engw8ksktzqs24zbv5dgcg": 31, "2021": [31, 34, 36, 40, 44, 45, 47], "22t11": 31, "scan": [31, 33, 34, 36, 39, 47, 53, 54], "116068": 31, "scalar": [31, 33, 34, 36, 39, 47, 53, 54], "listen": [31, 33, 34, 39, 47, 53, 54], "xxxx": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "saw": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "consol": [31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "avoid": [31, 33, 36], "nbest": [31, 36, 42], "lattic": [31, 34, 36, 39, 40, 44, 45, 54, 55], "uniqu": [31, 36, 39, 54, 55], "pkufool": [31, 34, 40], "icefall_asr_aishell_conformer_ctc": 31, "transcrib": [31, 33, 34, 36], "v1": [31, 34, 36, 40, 44, 45], "lang_char": [31, 33], "bac009s0764w0121": [31, 33, 34], "bac009s0764w0122": [31, 33, 34], "bac009s0764w0123": [31, 33, 34], "tran": [31, 34, 36, 40, 44, 45], "graph": [31, 34, 36, 39, 40, 44, 45, 54, 55], "conveni": [31, 34, 36, 37], "eo": [31, 34, 36], "soxi": [31, 33, 34, 36, 40, 47], "sampl": [31, 33, 34, 36, 40, 41, 47, 54, 55], "precis": [31, 33, 34, 36, 39, 40, 47, 54, 55], "67263": [31, 33, 34], "cdda": [31, 33, 34, 36, 40, 47], "sector": [31, 33, 34, 36, 40, 47], "135k": [31, 33, 34], "256k": [31, 33, 34, 36], "sign": [31, 33, 34, 36, 47], "integ": [31, 33, 34, 36, 47], "pcm": [31, 33, 34, 36, 47], "65840": [31, 33, 34], "308": [31, 33, 34], "625": [31, 33, 34], "132k": [31, 33, 34], "64000": [31, 33, 34], "300": [31, 33, 34, 36, 37, 39, 49, 54], "128k": [31, 33, 34, 47], "displai": [31, 33, 34, 36], "topologi": [31, 36], "num_decoder_lay": [31, 36], "vgg_frontend": [31, 33, 36], "use_feat_batchnorm": [31, 36], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 31, "sun": 31, "sep": 31, "33cfe45": 31, "d57a873": 31, "nov": [31, 36], "hw": 31, "kangwei": 31, "icefall_aishell3": 31, "k2_releas": 31, "tokens_fil": 31, "num_path": [31, 36, 39, 54, 55], "ngram_lm_scal": [31, 36], "attention_decoder_scal": [31, 36], "nbest_scal": [31, 36], "sos_id": [31, 36], "eos_id": [31, 36], "4336": [31, 33], "131": [31, 36], "293": [31, 36], "369": [31, 36], "\u751a": [31, 33], "\u81f3": [31, 33], "\u51fa": [31, 33], "\u73b0": [31, 33], "\u4ea4": [31, 33], "\u6613": [31, 33], "\u51e0": [31, 33], "\u4e4e": [31, 33], "\u505c": [31, 33], "\u6b62": 31, "\u7684": [31, 33, 34], "\u60c5": [31, 33], "\u51b5": [31, 33], "\u4e00": [31, 33], "\u4e8c": [31, 33], "\u7ebf": [31, 33, 34], "\u57ce": [31, 33], "\u5e02": [31, 33], "\u867d": [31, 33], "\u7136": [31, 33], "\u4e5f": [31, 33, 34], "\u5904": [31, 33], "\u4e8e": [31, 33], "\u8c03": [31, 33], "\u6574": [31, 33], "\u4e2d": [31, 33, 34], "\u4f46": [31, 33, 34], "\u56e0": [31, 33], "\u4e3a": [31, 33], "\u805a": [31, 33], "\u96c6": [31, 33], "\u4e86": [31, 33, 34], "\u8fc7": [31, 33], "\u591a": [31, 33], "\u516c": [31, 33], "\u5171": [31, 33], "\u8d44": [31, 33], "\u6e90": [31, 33], "371": 31, "683": 31, "684": [31, 47], "651": [31, 47], "654": 31, "659": 31, "752": 31, "887": 31, "340": 31, "370": 31, "\u751a\u81f3": [31, 34], "\u51fa\u73b0": [31, 34], "\u4ea4\u6613": [31, 34], "\u51e0\u4e4e": [31, 34], "\u505c\u6b62": 31, "\u60c5\u51b5": [31, 34], "\u4e00\u4e8c": [31, 34], "\u57ce\u5e02": [31, 34], "\u867d\u7136": [31, 34], "\u5904\u4e8e": [31, 34], "\u8c03\u6574": [31, 34], "\u56e0\u4e3a": [31, 34], "\u805a\u96c6": [31, 34], "\u8fc7\u591a": [31, 34], "\u516c\u5171": [31, 34], "\u8d44\u6e90": [31, 34], "recor": [31, 36], "highest": [31, 36], "966": 31, "821": 31, "822": 31, "826": 31, "916": 31, "345": 31, "889": 31, "limit": [31, 33, 36, 51, 54], "upgrad": [31, 36], "NOT": [31, 33, 36, 47], "checkout": [31, 36], "hlg_decod": [31, 36], "four": [31, 36], "messag": [31, 36, 39, 41, 42, 53, 54, 55], "use_gpu": [31, 36], "word_tabl": [31, 36], "forward": [31, 36, 41], "cu": [31, 36], "int": [31, 36], "char": [31, 36], "98": 31, "150": [31, 36], "693": [31, 44], "165": [31, 36], "nnet_output": [31, 36], "185": [31, 36, 47], "217": [31, 36], "mandarin": 32, "beij": 32, "shell": 32, "technologi": 32, "ltd": 32, "peopl": 32, "accent": 32, "area": 32, "china": 32, "invit": 32, "particip": 32, "conduct": 32, "indoor": 32, "high": 32, "fidel": 32, "microphon": 32, "16khz": 32, "manual": 32, "through": 32, "profession": 32, "annot": 32, "inspect": 32, "free": [32, 37, 49, 53], "academ": 32, "moder": 32, "research": 32, "openslr": [32, 49], "ctc": [32, 35, 38, 42, 43, 46], "stateless": [32, 35, 39, 53, 54, 55], "conv1d": [33, 39, 53, 54, 55], "tanh": 33, "borrow": 33, "ieeexplor": 33, "ieee": 33, "stamp": 33, "jsp": 33, "arnumb": 33, "9054419": 33, "predict": [33, 37, 39, 53, 54, 55], "charact": 33, "unit": 33, "vocabulari": 33, "87939824": 33, "optimized_transduc": 33, "technqiu": 33, "end": [33, 39, 41, 42, 47, 53, 54, 55, 57, 58], "maximum": 33, "emit": 33, "simplifi": [33, 51], "significantli": 33, "degrad": 33, "exactli": 33, "unprun": 33, "advantag": 33, "minim": 33, "pruned_transducer_stateless": [33, 39, 51, 54], "altern": 33, "though": 33, "transducer_stateless_modifi": 33, "pr": 33, "ram": 33, "tri": 33, "prob": [33, 53], "219": [33, 36], "lagz6hrcqxoigbfd5e0y3q": 33, "03t14": 33, "8477": 33, "250": [33, 40], "sym": [33, 39, 54, 55], "beam_search": [33, 39, 54, 55], "decoding_method": 33, "beam_4": 33, "ensur": 33, "poor": 33, "531": [33, 34], "994": [33, 36], "027": 33, "encoder_out_dim": 33, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 33, "50d2281": 33, "mar": 33, "0815224919": 33, "75d558775b": 33, "mmnv8": 33, "72": [33, 36], "878": [33, 45], "880": 33, "891": 33, "113": [33, 36], "userwarn": 33, "__floordiv__": 33, "round": 33, "toward": 33, "trunc": 33, "floor": 33, "keep": [33, 39, 54, 55], "div": 33, "b": [33, 36, 44, 45], "rounding_mod": 33, "divis": 33, "x_len": 33, "163": [33, 36], "\u6ede": 33, "322": 33, "759": 33, "760": 33, "919": 33, "922": 33, "929": 33, "046": 33, "319": [33, 36], "798": 33, "831": [33, 45], "215": [33, 36, 40], "402": 33, "topk_hyp_index": 33, "topk_index": 33, "logit": 33, "583": [33, 45], "lji9mwuorlow3jkdhxwk8a": 34, "13t11": 34, "4454": 34, "icefall_asr_aishell_tdnn_lstm_ctc": 34, "858": [34, 36], "389": [34, 36], "161": [34, 36], "536": 34, "539": 34, "917": 34, "\u505c\u6ede": 34, "mmi": [35, 38], "blank": [35, 38], "skip": [35, 37, 38, 39, 53, 54, 55], "distil": [35, 38], "hubert": [35, 38], "ligru": [35, 43], "full": [36, 37, 39, 41, 42, 53, 54, 55], "libri": [36, 37, 39, 41, 42, 53, 54, 55], "subset": [36, 39, 41, 42, 53, 54, 55], "3x960": [36, 39, 41, 42, 53, 54, 55], "2880": [36, 39, 41, 42, 53, 54, 55], "lzgnetjwrxc3yghnmd4kpw": 36, "24t16": 36, "4540": 36, "sentenc": [36, 49], "piec": 36, "And": [36, 39, 41, 42, 53, 54, 55], "neither": 36, "nor": 36, "5000": 36, "033": 36, "537": 36, "full_libri": [36, 37], "464": 36, "548": 36, "776": 36, "652": [36, 47], "109226120": 36, "714": [36, 44], "206": 36, "944": 36, "1328": 36, "443": [36, 40], "2563": 36, "494": 36, "592": 36, "1715": 36, "52576": 36, "128": 36, "1424": 36, "807": 36, "506": 36, "808": [36, 44], "362": 36, "1477": 36, "2922": 36, "4295": 36, "52343": 36, "396": 36, "3584": 36, "432": 36, "680": [36, 44], "_pickl": 36, "unpicklingerror": 36, "invalid": 36, "hlg_modifi": 36, "g_4_gram": [36, 40, 44, 45], "sentencepiec": 36, "875": [36, 40], "212k": 36, "267440": [36, 40], "1253": [36, 40], "535k": 36, "77200": [36, 40], "154k": 36, "554": 36, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 36, "8d93169": 36, "601": 36, "025": 36, "broffel": 36, "osom": 36, "723": 36, "775": 36, "881": 36, "571": 36, "whole": [36, 40, 44, 45, 54, 55], "857": 36, "979": 36, "055": 36, "117": 36, "051": 36, "363": 36, "959": [36, 45], "546": 36, "598": 36, "599": [36, 40], "833": 36, "834": 36, "915": 36, "076": 36, "110": 36, "397": 36, "999": [36, 39, 54, 55], "concaten": 36, "bucket": 36, "sampler": 36, "1000": [36, 57, 58], "ctc_decod": 36, "ngram_lm_rescor": 36, "attention_rescor": 36, "105": 36, "125": [36, 47], "228": 36, "543": 36, "topo": 36, "547": 36, "729": 36, "703": 36, "545": 36, "122": 36, "126": 36, "135": [36, 47], "153": [36, 47], "945": 36, "475": 36, "191": [36, 44, 45], "398": 36, "515": 36, "deseri": 36, "441": 36, "fsaclass": 36, "loadfsa": 36, "const": 36, "string": 36, "c10": 36, "ignor": 36, "589": 36, "attention_scal": 36, "162": 36, "169": [36, 44, 45], "188": 36, "984": 36, "624": 36, "519": [36, 45], "632": 36, "645": [36, 47], "243": 36, "303": 36, "179": 36, "knowledg": 37, "vector": 37, "mvq": 37, "kd": 37, "pruned_transducer_stateless4": [37, 39, 51, 54], "theoret": 37, "applic": 37, "minor": 37, "stop_stag": [37, 57, 58], "thing": 37, "distillation_with_hubert": 37, "Of": 37, "cours": 37, "xl": 37, "proce": 37, "960h": [37, 41], "use_extracted_codebook": 37, "augment": 37, "th": [37, 44, 45], "fine": 37, "embedding_lay": 37, "num_codebook": 37, "under": [37, 49], "vq_fbank_layer36_cb8": 37, "whola": 37, "snippet": 37, "echo": 37, "awk": 37, "split": 37, "pruned_transducer_stateless6": 37, "12359": 37, "spec": 37, "warp": 37, "paid": 37, "suitabl": [39, 53, 54, 55], "pruned_transducer_stateless2": [39, 51, 54], "pruned_transducer_stateless5": [39, 51, 54], "scroll": [39, 41, 42, 53, 54, 55], "arxiv": [39, 53, 54, 55], "2206": [39, 53, 54, 55], "13236": [39, 53, 54, 55], "rework": [39, 51, 54], "daniel": [39, 54, 55], "joint": [39, 53, 54, 55], "contrari": [39, 53, 54, 55], "convent": [39, 53, 54, 55], "recurr": [39, 53, 54, 55], "2x": [39, 54, 55], "littl": [39, 54], "436000": [39, 41, 42, 53, 54, 55], "438000": [39, 41, 42, 53, 54, 55], "qogspbgsr8kzcrmmie9jgw": 39, "20t15": [39, 53, 54], "4468": [39, 53, 54], "210171": [39, 53, 54], "access": [39, 41, 42, 53, 54, 55], "googl": [39, 41, 42, 53, 54, 55], "6008": [39, 41, 42, 53, 54, 55], "localhost": [39, 41, 42, 53, 54, 55], "expos": [39, 41, 42, 53, 54, 55], "proxi": [39, 41, 42, 53, 54, 55], "bind_al": [39, 41, 42, 53, 54, 55], "fast_beam_search": [39, 41, 53, 54, 55], "474000": [39, 53, 54, 55], "largest": [39, 54, 55], "posterior": [39, 41, 54, 55], "algorithm": [39, 54, 55], "pdf": [39, 42, 54, 55], "1211": [39, 54, 55], "3711": [39, 54, 55], "espnet": [39, 54, 55], "net": [39, 54, 55], "beam_search_transduc": [39, 54, 55], "basic": [39, 54], "topk": [39, 54, 55], "expand": [39, 54, 55], "mode": [39, 54, 55], "being": [39, 54, 55], "hardcod": [39, 54, 55], "composit": [39, 54, 55], "log_prob": [39, 54, 55], "hard": [39, 51, 54, 55], "2211": [39, 54, 55], "00484": [39, 54, 55], "fast_beam_search_lg": [39, 54, 55], "trivial": [39, 54, 55], "fast_beam_search_nbest": [39, 54, 55], "random_path": [39, 54, 55], "shortest": [39, 54, 55], "fast_beam_search_nbest_lg": [39, 54, 55], "logic": [39, 54, 55], "smallest": [39, 53, 54, 55], "normal": [40, 44, 45, 47, 54], "icefall_asr_librispeech_tdnn": 40, "lstm_ctc": 40, "flac": 40, "116k": 40, "140k": 40, "343k": 40, "164k": 40, "105k": 40, "174k": 40, "pretraind": 40, "584": [40, 45], "791": 40, "245": 40, "098": 40, "099": 40, "methond": [40, 44, 45], "631": 40, "010": 40, "guidanc": 41, "bigger": 41, "simpli": 41, "discard": 41, "prevent": 41, "lconv": 41, "encourag": [41, 42, 53], "stabil": [41, 42], "doesn": 41, "warm": [41, 42], "xyozukpeqm62hbilud4upa": [41, 42], "ctc_guide_decode_b": 41, "pretrained_ctc": 41, "jit_pretrained_ctc": 41, "100h": 41, "yfyeung": 41, "wechat": 42, "zipformer_mmi": 42, "worker": [42, 53], "hp": 42, "tdnn_ligru_ctc": 44, "enough": [44, 45, 47, 49], "luomingshuang": [44, 45], "icefall_asr_timit_tdnn_ligru_ctc": 44, "pretrained_average_9_25": 44, "fdhc0_si1559": [44, 45], "felc0_si756": [44, 45], "fmgd0_si1564": [44, 45], "ffprobe": [44, 45], "show_format": [44, 45], "nistspher": [44, 45], "database_id": [44, 45], "database_vers": [44, 45], "utterance_id": [44, 45], "dhc0_si1559": [44, 45], "sample_min": [44, 45], "4176": [44, 45], "sample_max": [44, 45], "5984": [44, 45], "bitrat": [44, 45], "pcm_s16le": [44, 45], "s16": [44, 45], "elc0_si756": [44, 45], "1546": [44, 45], "1989": [44, 45], "mgd0_si1564": [44, 45], "7626": [44, 45], "10573": [44, 45], "660": 44, "695": 44, "697": 44, "819": 44, "829": 44, "sil": [44, 45], "dh": [44, 45], "ih": [44, 45], "uw": [44, 45], "ah": [44, 45], "ii": [44, 45], "z": [44, 45], "aa": [44, 45], "ei": [44, 45], "dx": [44, 45], "d": [44, 45, 49], "uh": [44, 45], "ng": [44, 45], "eh": [44, 45], "jh": [44, 45], "er": [44, 45], "ai": [44, 45], "hh": [44, 45], "aw": 44, "ae": [44, 45], "705": 44, "715": 44, "720": 44, "251": [44, 45], "ch": 44, "icefall_asr_timit_tdnn_lstm_ctc": 45, "pretrained_average_16_25": 45, "816": 45, "827": 45, "unk": 45, "739": 45, "977": 45, "978": 45, "981": 45, "ow": 45, "ykubhb5wrmosxykid1z9eg": 47, "23t23": 47, "icefall_asr_yesno_tdnn": 47, "0_0_1_0_0_1_1_1": 47, "0_0_1_0_1_0_0_1": 47, "0_0_1_1_0_0_0_1": 47, "0_0_1_1_0_1_1_0": 47, "0_0_1_1_1_0_0_0": 47, "0_0_1_1_1_1_0_0": 47, "0_1_0_0_0_1_0_0": 47, "0_1_0_0_1_0_1_0": 47, "0_1_0_1_0_0_0_0": 47, "0_1_0_1_1_1_0_0": 47, "0_1_1_0_0_1_1_1": 47, "0_1_1_1_0_0_1_0": 47, "0_1_1_1_1_0_1_0": 47, "1_0_0_0_0_0_0_0": 47, "1_0_0_0_0_0_1_1": 47, "1_0_0_1_0_1_1_1": 47, "1_0_1_1_0_1_1_1": 47, "1_0_1_1_1_1_0_1": 47, "1_1_0_0_0_1_1_1": 47, "1_1_0_0_1_0_1_1": 47, "1_1_0_1_0_1_0_0": 47, "1_1_0_1_1_0_0_1": 47, "1_1_0_1_1_1_1_0": 47, "1_1_1_0_0_1_0_1": 47, "1_1_1_0_1_0_1_0": 47, "1_1_1_1_0_0_1_0": 47, "1_1_1_1_1_0_0_0": 47, "1_1_1_1_1_1_1_1": 47, "54080": 47, "507": 47, "108k": 47, "119": 47, "650": 47, "139": 47, "143": 47, "198": 47, "181": 47, "186": 47, "187": 47, "correctli": 47, "simplest": 47, "nnlm": 49, "complet": 49, "wget": 49, "resourc": 49, "norm": 49, "gzip": 49, "prepare_lm_training_data": 49, "lm_data": 49, "grab": 49, "cup": 49, "coffe": 49, "sort_lm_training_data": 49, "sorted_lm_data": 49, "statist": 49, "lm_data_stat": 49, "aforement": 49, "repeat": 49, "previou": 49, "rnn_lm": 49, "tie": 49, "hidden_dim": 49, "hyper": [49, 57, 58], "coupl": [49, 57, 58], "dai": [49, 57, 58], "former": 51, "mask": [51, 54, 55], "wenet": 51, "did": 51, "request": 51, "complic": 51, "techniqu": 51, "bank": 51, "memor": 51, "histori": 51, "introduc": 51, "variant": 51, "pruned_stateless_emformer_rnnt2": 51, "conv_emformer_transducer_stateless": 51, "ourself": 51, "mechan": 51, "onlin": 53, "lstm_transducer_stateless": 53, "lower": 53, "prepare_giga_speech": 53, "cj2vtpiwqhkn9q1tx6ptpg": 53, "dynam": [54, 55], "causal": 54, "short": [54, 55], "2012": 54, "05481": 54, "flag": 54, "indic": [54, 55], "whether": 54, "sequenc": [54, 55], "uniformli": [54, 55], "seen": [54, 55], "97vkxf80ru61cnp2alwzzg": 54, "streaming_decod": [54, 55], "wise": [54, 55], "parallel": [54, 55], "bath": [54, 55], "parallelli": [54, 55], "seem": 54, "benefit": 54, "320m": 55, "550": 55, "basicli": 55, "scriptmodul": 55, "jit_trace_export": 55, "jit_trace_pretrain": 55, "monoton": 56, "align": 56, "ljspeech": 57, "condit": [57, 58], "variat": [57, 58], "autoencod": [57, 58], "adversari": [57, 58], "monotonic_align": [57, 58], "build_ext": [57, 58], "inplac": [57, 58], "ground": [57, 58], "truth": [57, 58], "test_onnx": [57, 58], "vctk": 58, "350": 58, "zrjin": 58, "synthesi": 59, "task": 59}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 22, 28, 29], "creat": [2, 13, 21], "recip": [2, 59], "data": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "prepar": [2, 9, 11, 21, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "train": [2, 9, 16, 18, 21, 24, 25, 26, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "decod": [2, 5, 6, 7, 9, 12, 21, 22, 27, 31, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "pre": [2, 18, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55], "model": [2, 5, 15, 18, 22, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 49, 53, 54, 55, 57, 58], "lodr": [4, 6], "rnn": [4, 48, 49], "transduc": [4, 6, 7, 24, 25, 26, 33, 39, 53, 54, 55], "wer": [4, 6, 7, 36], "differ": [4, 6, 7], "beam": [4, 6, 7, 33], "size": [4, 6, 7], "languag": [5, 49], "lm": [6, 36, 48], "rescor": [6, 31, 36], "base": 6, "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 33], "number": 6, "each": 6, "field": 6, "i": 6, "test": [6, 7, 21, 24, 25, 26], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 51], "view": 9, "avail": 9, "tag": 9, "download": [9, 11, 21, 24, 25, 26, 27, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 47, 53, 54, 55, 57, 58], "imag": 9, "run": [9, 22], "gpu": 9, "cpu": 9, "yesno": [9, 46], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 27], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 21, 24, 25, 26, 31, 33, 34, 36], "0": [10, 21], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": 11, "environ": [13, 21], "setup": 13, "virtual": [13, 21], "instal": [13, 21, 24, 25, 26, 31, 33, 34, 36, 40, 44, 45], "depend": 13, "icefal": [13, 14, 20, 21, 24, 25, 26], "dummi": 14, "tutori": 14, "export": [15, 22, 23, 24, 25, 26, 27, 28, 29, 30, 39, 41, 42, 53, 54, 55, 57, 58], "paramet": 15, "via": [15, 24, 25, 26], "state_dict": [15, 22, 39, 41, 42, 53, 54, 55], "torch": [15, 21, 24, 25, 26, 28, 29, 39, 41, 42, 53, 54, 55], "jit": [15, 24, 25, 26, 28, 29, 39, 41, 42, 53, 54, 55], "script": [15, 28, 39, 41, 42, 54, 55], "onnx": [15, 27], "huggingfac": [17, 19], "space": 19, "youtub": [19, 21], "video": [19, 21], "content": [20, 59], "cuda": 21, "toolkit": 21, "cudnn": 21, "torchaudio": 21, "2": [21, 24, 25, 26, 31, 33, 34, 36], "k2": 21, "3": [21, 24, 25, 26, 31, 33, 36], "lhots": 21, "4": [21, 24, 25, 26], "exampl": [21, 27, 31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "5": [21, 24, 25, 26], "6": [21, 24, 25, 26], "your": 21, "when": [22, 28, 29], "us": [22, 28, 29, 39, 41, 42, 53, 54, 55], "py": 22, "ncnn": [23, 24, 25, 26], "convemform": 24, "pnnx": [24, 25, 26], "trace": [24, 25, 26, 29, 53, 55], "torchscript": [24, 25, 26], "modifi": [24, 25, 26, 33], "encod": [24, 25, 26], "sherpa": [24, 25, 26, 27, 39, 54, 55], "7": [24, 25], "option": [24, 25, 31, 34, 36, 39, 41, 42, 53, 54, 55], "int8": [24, 25], "quantiz": [24, 25], "lstm": [25, 34, 40, 45, 53], "stream": [26, 35, 50, 51, 54, 55], "zipform": [26, 41, 42, 55], "sound": 27, "conform": [31, 36, 51], "ctc": [31, 34, 36, 40, 41, 44, 45, 47], "configur": [31, 34, 36, 39, 41, 42, 53, 54, 55], "log": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "usag": [31, 33, 34, 36, 39, 41, 42, 53, 54, 55], "case": [31, 33, 34, 36], "kaldifeat": [31, 33, 34, 36, 40, 44, 45, 47], "hlg": [31, 34, 36], "attent": [31, 36], "colab": [31, 33, 34, 36, 40, 44, 45, 47], "notebook": [31, 33, 34, 36, 40, 44, 45, 47], "deploy": [31, 36], "c": [31, 36], "aishel": 32, "stateless": 33, "loss": 33, "todo": 33, "greedi": 33, "search": [33, 57, 58], "tdnn": [34, 40, 44, 45, 47], "non": 35, "asr": [35, 50], "comput": 36, "n": 36, "gram": 36, "distil": 37, "hubert": 37, "codebook": 37, "index": 37, "librispeech": [38, 52], "prune": [39, 54], "statelessx": [39, 54], "pretrain": [39, 41, 42, 53, 54, 55, 57, 58], "deploi": [39, 54, 55], "infer": [40, 44, 45, 47, 57, 58], "blank": 41, "skip": 41, "mmi": 42, "timit": 43, "ligru": 44, "an": 49, "emform": 51, "which": 53, "simul": [54, 55], "real": [54, 55], "tt": 56, "vit": [57, 58], "build": [57, 58], "monoton": [57, 58], "align": [57, 58], "tabl": 59}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [33, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [21, "training"], [31, "training"], [33, "training"], [34, "training"], [36, "training"], [37, "training"], [39, "training"], [40, "training"], [41, "training"], [42, "training"], [44, "training"], [45, "training"], [47, "training"], [53, "training"], [54, "training"], [55, "training"], [57, "training"], [58, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [21, "decoding"], [31, "decoding"], [33, "decoding"], [34, "decoding"], [36, "decoding"], [37, "decoding"], [39, "decoding"], [40, "decoding"], [41, "decoding"], [42, "decoding"], [44, "decoding"], [45, "decoding"], [47, "decoding"], [53, "decoding"], [54, "decoding"], [55, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [51, "introduction"]], "View available tags": [[9, "view-available-tags"]], "Download a docker image": [[9, "download-a-docker-image"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [21, "data-preparation"], [31, "data-preparation"], [34, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"], [39, "data-preparation"], [40, "data-preparation"], [41, "data-preparation"], [42, "data-preparation"], [44, "data-preparation"], [45, "data-preparation"], [47, "data-preparation"], [53, "data-preparation"], [54, "data-preparation"], [55, "data-preparation"], [57, "data-preparation"], [58, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Huggingface": [[17, "huggingface"]], "Pre-trained models": [[18, "pre-trained-models"]], "Huggingface spaces": [[19, "huggingface-spaces"]], "YouTube Video": [[19, "youtube-video"], [21, "youtube-video"]], "Icefall": [[20, "icefall"]], "Contents:": [[20, null]], "Installation": [[21, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[21, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[21, "install-torch-and-torchaudio"]], "(2) Install k2": [[21, "install-k2"]], "(3) Install lhotse": [[21, "install-lhotse"]], "(4) Download icefall": [[21, "download-icefall"]], "Installation example": [[21, "installation-example"]], "(1) Create a virtual environment": [[21, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[21, "id1"]], "(3) Install torch and torchaudio": [[21, "id2"]], "(4) Install k2": [[21, "id3"]], "(5) Install lhotse": [[21, "id5"]], "(6) Download icefall": [[21, "id6"]], "Test Your Installation": [[21, "test-your-installation"]], "Export model.state_dict()": [[22, "export-model-state-dict"], [39, "export-model-state-dict"], [41, "export-model-state-dict"], [42, "export-model-state-dict"], [53, "export-model-state-dict"], [54, "export-model-state-dict"], [55, "export-model-state-dict"]], "When to use it": [[22, "when-to-use-it"], [28, "when-to-use-it"], [29, "when-to-use-it"]], "How to export": [[22, "how-to-export"], [28, "how-to-export"], [29, "how-to-export"]], "How to use the exported model": [[22, "how-to-use-the-exported-model"], [28, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[22, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[23, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[24, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[24, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[24, "install-ncnn-and-pnnx"], [25, "install-ncnn-and-pnnx"], [26, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[24, "export-the-model-via-torch-jit-trace"], [25, "export-the-model-via-torch-jit-trace"], [26, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[24, "export-torchscript-model-via-pnnx"], [25, "export-torchscript-model-via-pnnx"], [26, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[24, "test-the-exported-models-in-icefall"], [25, "test-the-exported-models-in-icefall"], [26, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[24, "modify-the-exported-encoder-for-sherpa-ncnn"], [25, "modify-the-exported-encoder-for-sherpa-ncnn"], [26, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[24, "optional-int8-quantization-with-sherpa-ncnn"], [25, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[25, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[26, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[27, "export-to-onnx"]], "sherpa-onnx": [[27, "sherpa-onnx"]], "Example": [[27, "example"]], "Download the pre-trained model": [[27, "download-the-pre-trained-model"], [31, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [40, "download-the-pre-trained-model"], [44, "download-the-pre-trained-model"], [45, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"]], "Export the model to ONNX": [[27, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[27, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[28, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[29, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[29, "how-to-use-the-exported-models"]], "Model export": [[30, "model-export"]], "Conformer CTC": [[31, "conformer-ctc"], [36, "conformer-ctc"]], "Configurable options": [[31, "configurable-options"], [34, "configurable-options"], [36, "configurable-options"], [39, "configurable-options"], [41, "configurable-options"], [42, "configurable-options"], [53, "configurable-options"], [54, "configurable-options"], [55, "configurable-options"]], "Pre-configured options": [[31, "pre-configured-options"], [34, "pre-configured-options"], [36, "pre-configured-options"], [39, "pre-configured-options"], [41, "pre-configured-options"], [42, "pre-configured-options"], [53, "pre-configured-options"], [54, "pre-configured-options"], [55, "pre-configured-options"]], "Training logs": [[31, "training-logs"], [33, "training-logs"], [34, "training-logs"], [36, "training-logs"], [39, "training-logs"], [41, "training-logs"], [42, "training-logs"], [53, "training-logs"], [54, "training-logs"], [55, "training-logs"]], "Usage examples": [[31, "usage-examples"], [33, "usage-examples"], [34, "usage-examples"], [36, "usage-examples"]], "Case 1": [[31, "case-1"], [33, "case-1"], [34, "case-1"], [36, "case-1"]], "Case 2": [[31, "case-2"], [33, "case-2"], [34, "case-2"], [36, "case-2"]], "Case 3": [[31, "case-3"], [33, "case-3"], [36, "case-3"]], "Pre-trained Model": [[31, "pre-trained-model"], [33, "pre-trained-model"], [34, "pre-trained-model"], [36, "pre-trained-model"], [40, "pre-trained-model"], [44, "pre-trained-model"], [45, "pre-trained-model"], [47, "pre-trained-model"]], "Install kaldifeat": [[31, "install-kaldifeat"], [33, "install-kaldifeat"], [34, "install-kaldifeat"], [36, "install-kaldifeat"], [40, "install-kaldifeat"], [44, "install-kaldifeat"], [45, "install-kaldifeat"]], "Usage": [[31, "usage"], [33, "usage"], [34, "usage"], [36, "usage"]], "CTC decoding": [[31, "ctc-decoding"], [36, "ctc-decoding"], [36, "id2"]], "HLG decoding": [[31, "hlg-decoding"], [31, "id2"], [34, "hlg-decoding"], [36, "hlg-decoding"], [36, "id3"]], "HLG decoding + attention decoder rescoring": [[31, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[31, "colab-notebook"], [33, "colab-notebook"], [34, "colab-notebook"], [36, "colab-notebook"], [40, "colab-notebook"], [44, "colab-notebook"], [45, "colab-notebook"], [47, "colab-notebook"]], "Deployment with C++": [[31, "deployment-with-c"], [36, "deployment-with-c"]], "aishell": [[32, "aishell"]], "Stateless Transducer": [[33, "stateless-transducer"]], "The Model": [[33, "the-model"]], "The Loss": [[33, "the-loss"]], "Todo": [[33, "id1"]], "Greedy search": [[33, "greedy-search"]], "Beam search": [[33, "beam-search"]], "Modified Beam search": [[33, "modified-beam-search"]], "TDNN-LSTM CTC": [[34, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[35, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[36, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[36, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[36, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[37, "distillation-with-hubert"]], "Codebook index preparation": [[37, "codebook-index-preparation"]], "LibriSpeech": [[38, "librispeech"], [52, "librispeech"]], "Pruned transducer statelessX": [[39, "pruned-transducer-statelessx"], [54, "pruned-transducer-statelessx"]], "Usage example": [[39, "usage-example"], [41, "usage-example"], [42, "usage-example"], [53, "usage-example"], [54, "usage-example"], [55, "usage-example"]], "Export Model": [[39, "export-model"], [54, "export-model"], [55, "export-model"]], "Export model using torch.jit.script()": [[39, "export-model-using-torch-jit-script"], [41, "export-model-using-torch-jit-script"], [42, "export-model-using-torch-jit-script"], [54, "export-model-using-torch-jit-script"], [55, "export-model-using-torch-jit-script"]], "Download pretrained models": [[39, "download-pretrained-models"], [41, "download-pretrained-models"], [42, "download-pretrained-models"], [53, "download-pretrained-models"], [54, "download-pretrained-models"], [55, "download-pretrained-models"], [57, "download-pretrained-models"], [58, "download-pretrained-models"]], "Deploy with Sherpa": [[39, "deploy-with-sherpa"], [54, "deploy-with-sherpa"], [55, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[40, "tdnn-lstm-ctc"], [45, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[40, "inference-with-a-pre-trained-model"], [44, "inference-with-a-pre-trained-model"], [45, "inference-with-a-pre-trained-model"], [47, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[41, "zipformer-ctc-blank-skip"]], "Export models": [[41, "export-models"], [42, "export-models"], [53, "export-models"], [57, "export-models"], [58, "export-models"]], "Zipformer MMI": [[42, "zipformer-mmi"]], "TIMIT": [[43, "timit"]], "TDNN-LiGRU-CTC": [[44, "tdnn-ligru-ctc"]], "YesNo": [[46, "yesno"]], "TDNN-CTC": [[47, "tdnn-ctc"]], "Download kaldifeat": [[47, "download-kaldifeat"]], "RNN-LM": [[48, "rnn-lm"]], "Train an RNN language model": [[49, "train-an-rnn-language-model"]], "Streaming ASR": [[50, "streaming-asr"]], "Streaming Conformer": [[51, "streaming-conformer"]], "Streaming Emformer": [[51, "streaming-emformer"]], "LSTM Transducer": [[53, "lstm-transducer"]], "Which model to use": [[53, "which-model-to-use"]], "Export model using torch.jit.trace()": [[53, "export-model-using-torch-jit-trace"], [55, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[54, "simulate-streaming-decoding"], [55, "simulate-streaming-decoding"]], "Real streaming decoding": [[54, "real-streaming-decoding"], [55, "real-streaming-decoding"]], "Zipformer Transducer": [[55, "zipformer-transducer"]], "TTS": [[56, "tts"]], "VITS": [[57, "vits"], [58, "vits"]], "Build Monotonic Alignment Search": [[57, "build-monotonic-alignment-search"], [58, "build-monotonic-alignment-search"]], "Inference": [[57, "inference"], [58, "inference"]], "Recipes": [[59, "recipes"]], "Table of Contents": [[59, null]]}, "indexentries": {}})
\ No newline at end of file