mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
496 lines
53 KiB
HTML
496 lines
53 KiB
HTML
|
||
|
||
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en">
|
||
<head>
|
||
<meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>TDNN-LSTM-CTC — icefall 0.1 documentation</title>
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" />
|
||
<link rel="stylesheet" type="text/css" href="../../../_static/css/theme.css?v=e59714d7" />
|
||
|
||
|
||
<script src="../../../_static/jquery.js?v=5d32c60e"></script>
|
||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js?v=e031e9a9"></script>
|
||
<script src="../../../_static/doctools.js?v=888ff710"></script>
|
||
<script src="../../../_static/sphinx_highlight.js?v=4825356b"></script>
|
||
<script src="../../../_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||
<link rel="search" title="Search" href="../../../search.html" />
|
||
<link rel="next" title="YesNo" href="../yesno/index.html" />
|
||
<link rel="prev" title="TDNN-LiGRU-CTC" href="tdnn_ligru_ctc.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
|
||
|
||
|
||
<a href="../../../index.html" class="icon icon-home">
|
||
icefall
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../for-dummies/index.html">Icefall for dummies tutorial</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../model-export/index.html">Model export</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../fst-based-forced-alignment/index.html">FST-based forced alignment</a></li>
|
||
</ul>
|
||
<ul class="current">
|
||
<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Recipes</a><ul class="current">
|
||
<li class="toctree-l2 current"><a class="reference internal" href="../index.html">Non Streaming ASR</a><ul class="current">
|
||
<li class="toctree-l3"><a class="reference internal" href="../aishell/index.html">aishell</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../librispeech/index.html">LibriSpeech</a></li>
|
||
<li class="toctree-l3 current"><a class="reference internal" href="index.html">TIMIT</a><ul class="current">
|
||
<li class="toctree-l4"><a class="reference internal" href="tdnn_ligru_ctc.html">TDNN-LiGRU-CTC</a></li>
|
||
<li class="toctree-l4 current"><a class="current reference internal" href="#">TDNN-LSTM-CTC</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../Streaming-ASR/index.html">Streaming ASR</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../RNN-LM/index.html">RNN-LM</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../TTS/index.html">TTS</a></li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../Finetune/index.html">Fine-tune a pre-trained model</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/index.html">Contributing</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../huggingface/index.html">Huggingface</a></li>
|
||
</ul>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="../../../index.html">icefall</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||
<li class="breadcrumb-item"><a href="../../index.html">Recipes</a></li>
|
||
<li class="breadcrumb-item"><a href="../index.html">Non Streaming ASR</a></li>
|
||
<li class="breadcrumb-item"><a href="index.html">TIMIT</a></li>
|
||
<li class="breadcrumb-item active">TDNN-LSTM-CTC</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
<a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst" class="fa fa-github"> Edit on GitHub</a>
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<section id="tdnn-lstm-ctc">
|
||
<h1>TDNN-LSTM-CTC<a class="headerlink" href="#tdnn-lstm-ctc" title="Permalink to this heading"></a></h1>
|
||
<p>This tutorial shows you how to run a TDNN-LSTM-CTC model with the <a class="reference external" href="https://data.deepai.org/timit.zip">TIMIT</a> dataset.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>We assume you have read the page <a class="reference internal" href="../../../installation/index.html#install-icefall"><span class="std std-ref">Installation</span></a> and have setup
|
||
the environment for <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
</div>
|
||
<section id="data-preparation">
|
||
<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span>./prepare.sh
|
||
</pre></div>
|
||
</div>
|
||
<p>The script <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> handles the data preparation for you, <strong>automagically</strong>.
|
||
All you need to do is to run it.</p>
|
||
<p>The data preparation contains several stages, you can use the following two
|
||
options:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stage</span></code></p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stop-stage</span></code></p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>to control which stage(s) should be run. By default, all stages are executed.</p>
|
||
<p>For example,</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">0</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">0</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>means to run only stage 0.</p>
|
||
<p>To run stage 2 to stage 5, use:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">2</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">5</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="training">
|
||
<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
|
||
<p>Now describing the training of TDNN-LSTM-CTC model, contained in
|
||
the <a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_lstm_ctc">tdnn_lstm_ctc</a>
|
||
folder.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>TIMIT is a very small dataset. So one GPU for training is enough.</p>
|
||
</div>
|
||
<p>The command to run the training part is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0"</span>
|
||
$<span class="w"> </span>./tdnn_lstm_ctc/train.py
|
||
</pre></div>
|
||
</div>
|
||
<p>By default, it will run <code class="docutils literal notranslate"><span class="pre">25</span></code> epochs. Training logs and checkpoints are saved
|
||
in <code class="docutils literal notranslate"><span class="pre">tdnn_lstm_ctc/exp</span></code>.</p>
|
||
<p>In <code class="docutils literal notranslate"><span class="pre">tdnn_lstm_ctc/exp</span></code>, you will find the following files:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …, <code class="docutils literal notranslate"><span class="pre">epoch-29.pt</span></code></p>
|
||
<p>These are checkpoint files, containing model <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
|
||
To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">epoch-10.pt</span></code>, you can use:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./tdnn_lstm_ctc/train.py<span class="w"> </span>--start-epoch<span class="w"> </span><span class="m">11</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">tensorboard/</span></code></p>
|
||
<p>This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||
rate, etc, are recorded in these logs. You can visualize them by:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>tdnn_lstm_ctc/exp/tensorboard
|
||
$<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w"> </span>upload<span class="w"> </span>--logdir<span class="w"> </span>.<span class="w"> </span>--description<span class="w"> </span><span class="s2">"TDNN LSTM training for timit with icefall"</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">log/log-train-xxxx</span></code></p>
|
||
<p>It is the detailed training log in text format, same as the one
|
||
you saw printed to the console during training.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>To see available training options, you can use:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./tdnn_lstm_ctc/train.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>Other training options, e.g., learning rate, results dir, etc., are
|
||
pre-configured in the function <code class="docutils literal notranslate"><span class="pre">get_params()</span></code>
|
||
in <a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/timit/ASR/tdnn_lstm_ctc/train.py">tdnn_lstm_ctc/train.py</a>.
|
||
Normally, you don’t need to change them. You can change them by modifying the code, if
|
||
you want.</p>
|
||
</section>
|
||
<section id="decoding">
|
||
<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
|
||
<p>The decoding part uses checkpoints saved by the training part, so you have
|
||
to run the training part first.</p>
|
||
<p>The command for decoding is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0"</span>
|
||
$<span class="w"> </span>./tdnn_lstm_ctc/decode.py
|
||
</pre></div>
|
||
</div>
|
||
<p>You will see the WER in the output log.</p>
|
||
<p>Decoded results are saved in <code class="docutils literal notranslate"><span class="pre">tdnn_lstm_ctc/exp</span></code>.</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./tdnn_lstm_ctc/decode.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows you the available decoding options.</p>
|
||
<p>Some commonly used options are:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--epoch</span></code></p>
|
||
<p>You can select which checkpoint to be used for decoding.
|
||
For instance, <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/decode.py</span> <span class="pre">--epoch</span> <span class="pre">10</span></code> means to use
|
||
<code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/exp/epoch-10.pt</span></code> for decoding.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--avg</span></code></p>
|
||
<p>It’s related to model averaging. It specifies number of checkpoints
|
||
to be averaged. The averaged model is used for decoding.
|
||
For example, the following command:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./tdnn_lstm_ctc/decode.py<span class="w"> </span>--epoch<span class="w"> </span><span class="m">25</span><span class="w"> </span>--avg<span class="w"> </span><span class="m">10</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>uses the average of <code class="docutils literal notranslate"><span class="pre">epoch-16.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-17.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-18.pt</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">epoch-19.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-20.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-21.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-22.pt</span></code>,
|
||
<code class="docutils literal notranslate"><span class="pre">epoch-23.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-24.pt</span></code> and <code class="docutils literal notranslate"><span class="pre">epoch-25.pt</span></code>
|
||
for decoding.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--export</span></code></p>
|
||
<p>If it is <code class="docutils literal notranslate"><span class="pre">True</span></code>, i.e., <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/decode.py</span> <span class="pre">--export</span> <span class="pre">1</span></code>, the code
|
||
will save the averaged model to <code class="docutils literal notranslate"><span class="pre">tdnn_lstm_ctc/exp/pretrained.pt</span></code>.
|
||
See <a class="reference internal" href="#tdnn-lstm-ctc-use-a-pre-trained-model"><span class="std std-ref">Pre-trained Model</span></a> for how to use it.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="pre-trained-model">
|
||
<span id="tdnn-lstm-ctc-use-a-pre-trained-model"></span><h2>Pre-trained Model<a class="headerlink" href="#pre-trained-model" title="Permalink to this heading"></a></h2>
|
||
<p>We have uploaded the pre-trained model to
|
||
<a class="reference external" href="https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_lstm_ctc">https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_lstm_ctc</a>.</p>
|
||
<p>The following shows you how to use the pre-trained model.</p>
|
||
<section id="install-kaldifeat">
|
||
<h3>Install kaldifeat<a class="headerlink" href="#install-kaldifeat" title="Permalink to this heading"></a></h3>
|
||
<p><a class="reference external" href="https://github.com/csukuangfj/kaldifeat">kaldifeat</a> is used to
|
||
extract features for a single sound file or multiple sound files
|
||
at the same time.</p>
|
||
<p>Please refer to <a class="reference external" href="https://github.com/csukuangfj/kaldifeat">https://github.com/csukuangfj/kaldifeat</a> for installation.</p>
|
||
</section>
|
||
<section id="download-the-pre-trained-model">
|
||
<h3>Download the pre-trained model<a class="headerlink" href="#download-the-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span>mkdir<span class="w"> </span>tmp-lstm
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>tmp-lstm
|
||
$<span class="w"> </span>git<span class="w"> </span>lfs<span class="w"> </span>install
|
||
$<span class="w"> </span>git<span class="w"> </span>clone<span class="w"> </span>https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_lstm_ctc
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>You have to use <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">lfs</span></code> to download the pre-trained model.</p>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>In order to use this pre-trained model, your k2 version has to be v1.7 or later.</p>
|
||
</div>
|
||
<p>After downloading, you will have the following files:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span>tree<span class="w"> </span>tmp-lstm
|
||
</pre></div>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>tmp-lstm/
|
||
<span class="sb">`</span>--<span class="w"> </span>icefall_asr_timit_tdnn_lstm_ctc
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>README.md
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>data
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>lang_phone
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>HLG.pt
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>tokens.txt
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>words.txt
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>lm
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>G_4_gram.pt
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>exp
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>pretrained_average_16_25.pt
|
||
<span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>test_wavs
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>FDHC0_SI1559.WAV
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>FELC0_SI756.WAV
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>FMGD0_SI1564.WAV
|
||
<span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>trans.txt
|
||
|
||
<span class="m">6</span><span class="w"> </span>directories,<span class="w"> </span><span class="m">10</span><span class="w"> </span>files
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>File descriptions</strong>:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/HLG.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is the decoding graph.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/tokens.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains tokens and their IDs.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/words.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains words and their IDs.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lm/G_4_gram.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is a 4-gram LM, useful for LM rescoring.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp/pretrained.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains pre-trained model parameters, obtained by averaging
|
||
checkpoints from <code class="docutils literal notranslate"><span class="pre">epoch-16.pt</span></code> to <code class="docutils literal notranslate"><span class="pre">epoch-25.pt</span></code>.
|
||
Note: We have removed optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> to reduce file size.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/*.WAV</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains some test sound files from timit <code class="docutils literal notranslate"><span class="pre">TEST</span></code> dataset.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/trans.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains the reference transcripts for the sound files in <code class="docutils literal notranslate"><span class="pre">test_waves/</span></code>.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>The information of the test sound files is listed below:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>ffprobe<span class="w"> </span>-show_format<span class="w"> </span>tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||
|
||
Input<span class="w"> </span><span class="c1">#0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV':</span>
|
||
Metadata:
|
||
<span class="w"> </span>database_id<span class="w"> </span>:<span class="w"> </span>TIMIT
|
||
<span class="w"> </span>database_version:<span class="w"> </span><span class="m">1</span>.0
|
||
<span class="w"> </span>utterance_id<span class="w"> </span>:<span class="w"> </span>dhc0_si1559
|
||
<span class="w"> </span>sample_min<span class="w"> </span>:<span class="w"> </span>-4176
|
||
<span class="w"> </span>sample_max<span class="w"> </span>:<span class="w"> </span><span class="m">5984</span>
|
||
Duration:<span class="w"> </span><span class="m">00</span>:00:03.40,<span class="w"> </span>bitrate:<span class="w"> </span><span class="m">258</span><span class="w"> </span>kb/s
|
||
<span class="w"> </span>Stream<span class="w"> </span><span class="c1">#0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s</span>
|
||
|
||
$<span class="w"> </span>ffprobe<span class="w"> </span>-show_format<span class="w"> </span>tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||
|
||
Input<span class="w"> </span><span class="c1">#0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV':</span>
|
||
Metadata:
|
||
<span class="w"> </span>database_id<span class="w"> </span>:<span class="w"> </span>TIMIT
|
||
<span class="w"> </span>database_version:<span class="w"> </span><span class="m">1</span>.0
|
||
<span class="w"> </span>utterance_id<span class="w"> </span>:<span class="w"> </span>elc0_si756
|
||
<span class="w"> </span>sample_min<span class="w"> </span>:<span class="w"> </span>-1546
|
||
<span class="w"> </span>sample_max<span class="w"> </span>:<span class="w"> </span><span class="m">1989</span>
|
||
Duration:<span class="w"> </span><span class="m">00</span>:00:04.19,<span class="w"> </span>bitrate:<span class="w"> </span><span class="m">257</span><span class="w"> </span>kb/s
|
||
<span class="w"> </span>Stream<span class="w"> </span><span class="c1">#0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s</span>
|
||
|
||
$<span class="w"> </span>ffprobe<span class="w"> </span>-show_format<span class="w"> </span>tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||
|
||
Input<span class="w"> </span><span class="c1">#0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV':</span>
|
||
Metadata:
|
||
<span class="w"> </span>database_id<span class="w"> </span>:<span class="w"> </span>TIMIT
|
||
<span class="w"> </span>database_version:<span class="w"> </span><span class="m">1</span>.0
|
||
<span class="w"> </span>utterance_id<span class="w"> </span>:<span class="w"> </span>mgd0_si1564
|
||
<span class="w"> </span>sample_min<span class="w"> </span>:<span class="w"> </span>-7626
|
||
<span class="w"> </span>sample_max<span class="w"> </span>:<span class="w"> </span><span class="m">10573</span>
|
||
Duration:<span class="w"> </span><span class="m">00</span>:00:04.44,<span class="w"> </span>bitrate:<span class="w"> </span><span class="m">257</span><span class="w"> </span>kb/s
|
||
<span class="w"> </span>Stream<span class="w"> </span><span class="c1">#0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="inference-with-a-pre-trained-model">
|
||
<h3>Inference with a pre-trained model<a class="headerlink" href="#inference-with-a-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/timit/ASR
|
||
$<span class="w"> </span>./tdnn_lstm_ctc/pretrained.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows the usage information of <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/pretrained.py</span></code>.</p>
|
||
<p>To decode with <code class="docutils literal notranslate"><span class="pre">1best</span></code> method, we can use:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./tdnn_lstm_ctc/pretrained.py
|
||
<span class="w"> </span>--method<span class="w"> </span>1best
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/exp/pretrained_average_16_25.pt
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/words.txt
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">49</span><span class="p">,</span><span class="mi">583</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">169</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">49</span><span class="p">,</span><span class="mi">584</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">171</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">53</span><span class="p">,</span><span class="mi">816</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">183</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span><span class="w"> </span><span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_phone</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">53</span><span class="p">,</span><span class="mi">827</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">200</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">53</span><span class="p">,</span><span class="mi">827</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">210</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV'</span><span class="p">,</span> <span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV'</span><span class="p">,</span> <span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">53</span><span class="p">,</span><span class="mi">831</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">216</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">54</span><span class="p">,</span><span class="mi">380</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">246</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">54</span><span class="p">,</span><span class="mi">387</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">267</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FDHC0_SI1559</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">dh</span> <span class="n">ih</span> <span class="n">sh</span> <span class="n">uw</span> <span class="n">ah</span> <span class="n">l</span> <span class="n">iy</span> <span class="n">v</span> <span class="n">iy</span> <span class="n">z</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">p</span> <span class="n">r</span> <span class="n">aa</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">s</span> <span class="n">ih</span> <span class="n">m</span> <span class="n">ey</span> <span class="n">dx</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">d</span> <span class="n">w</span> <span class="n">uh</span> <span class="n">dx</span> <span class="n">iy</span> <span class="n">w</span> <span class="n">ih</span> <span class="n">s</span> <span class="n">f</span> <span class="n">iy</span> <span class="n">l</span> <span class="n">iy</span> <span class="n">w</span> <span class="n">ih</span> <span class="n">th</span> <span class="n">ih</span> <span class="n">n</span> <span class="n">ih</span> <span class="n">m</span> <span class="n">s</span> <span class="n">eh</span> <span class="n">l</span> <span class="n">f</span> <span class="n">sil</span> <span class="n">jh</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FELC0_SI756</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">dh</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">t</span> <span class="n">ih</span> <span class="n">r</span> <span class="n">ih</span> <span class="n">s</span> <span class="n">sil</span> <span class="n">s</span> <span class="n">er</span> <span class="n">r</span> <span class="n">ih</span> <span class="n">m</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">m</span> <span class="n">aa</span> <span class="n">l</span> <span class="n">ih</span> <span class="n">ng</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">l</span> <span class="n">ey</span> <span class="n">sil</span> <span class="n">r</span> <span class="n">eh</span> <span class="n">sil</span> <span class="n">d</span> <span class="n">w</span> <span class="n">ay</span> <span class="n">sil</span> <span class="n">d</span> <span class="n">aa</span> <span class="n">r</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ah</span> <span class="n">f</span> <span class="n">sil</span> <span class="o"><</span><span class="n">UNK</span><span class="o">></span> <span class="n">jh</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FMGD0_SI1564</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">hh</span> <span class="n">ae</span> <span class="n">z</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ih</span> <span class="n">n</span> <span class="n">iy</span> <span class="n">w</span> <span class="n">ah</span> <span class="n">z</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ae</span> <span class="n">n</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ay</span> <span class="n">s</span> <span class="n">sil</span> <span class="n">n</span> <span class="n">ey</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">eh</span> <span class="n">l</span> <span class="n">f</span> <span class="n">eh</span> <span class="n">n</span> <span class="n">s</span> <span class="n">ih</span> <span class="n">z</span> <span class="n">eh</span> <span class="n">n</span> <span class="n">dh</span> <span class="n">eh</span> <span class="n">r</span> <span class="n">w</span> <span class="n">er</span> <span class="n">sil</span> <span class="n">g</span> <span class="n">r</span> <span class="n">ey</span> <span class="n">z</span> <span class="n">ih</span> <span class="n">ng</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">ae</span> <span class="n">dx</span> <span class="n">l</span> <span class="n">sil</span>
|
||
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">21</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mi">54</span><span class="p">,</span><span class="mi">387</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">269</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>To decode with <code class="docutils literal notranslate"><span class="pre">whole-lattice-rescoring</span></code> methond, you can use</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./tdnn_lstm_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>whole-lattice-rescoring<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/exp/pretrained_average_16_25.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--G<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lm/G_4_gram.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--ngram-lm-scale<span class="w"> </span><span class="m">0</span>.08<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||
<span class="w"> </span>./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||
</pre></div>
|
||
</div>
|
||
<p>The decoding output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span><span class="mi">739</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">169</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span><span class="mi">739</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">171</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span><span class="mi">959</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">183</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span><span class="w"> </span><span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_phone</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span><span class="mi">971</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">191</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">G</span> <span class="kn">from</span><span class="w"> </span><span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lm</span><span class="o">/</span><span class="n">G_4_gram</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span><span class="mi">977</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">200</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span><span class="mi">978</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">210</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV'</span><span class="p">,</span> <span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV'</span><span class="p">,</span> <span class="s1">'./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">26</span><span class="p">,</span><span class="mi">981</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">216</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">27</span><span class="p">,</span><span class="mi">519</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">251</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span> <span class="o">+</span> <span class="n">LM</span> <span class="n">rescoring</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">08</span> <span class="mi">20</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">27</span><span class="p">,</span><span class="mi">878</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">267</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FDHC0_SI1559</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">dh</span> <span class="n">ih</span> <span class="n">sh</span> <span class="n">uw</span> <span class="n">l</span> <span class="n">iy</span> <span class="n">v</span> <span class="n">iy</span> <span class="n">z</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">p</span> <span class="n">r</span> <span class="n">aa</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">s</span> <span class="n">ah</span> <span class="n">m</span> <span class="n">ey</span> <span class="n">dx</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">w</span> <span class="n">uh</span> <span class="n">dx</span> <span class="n">iy</span> <span class="n">w</span> <span class="n">ih</span> <span class="n">s</span> <span class="n">f</span> <span class="n">iy</span> <span class="n">l</span> <span class="n">ih</span> <span class="n">ng</span> <span class="n">w</span> <span class="n">ih</span> <span class="n">th</span> <span class="n">ih</span> <span class="n">n</span> <span class="n">ih</span> <span class="n">m</span> <span class="n">s</span> <span class="n">eh</span> <span class="n">l</span> <span class="n">f</span> <span class="n">sil</span> <span class="n">jh</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FELC0_SI756</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">dh</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">t</span> <span class="n">ih</span> <span class="n">r</span> <span class="n">iy</span> <span class="n">ih</span> <span class="n">s</span> <span class="n">sil</span> <span class="n">s</span> <span class="n">er</span> <span class="n">r</span> <span class="n">eh</span> <span class="n">m</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">n</span> <span class="n">ah</span> <span class="n">l</span> <span class="n">ih</span> <span class="n">ng</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">l</span> <span class="n">ey</span> <span class="n">sil</span> <span class="n">r</span> <span class="n">eh</span> <span class="n">sil</span> <span class="n">d</span> <span class="n">w</span> <span class="n">ay</span> <span class="n">sil</span> <span class="n">d</span> <span class="n">aa</span> <span class="n">r</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ow</span> <span class="n">f</span> <span class="n">sil</span> <span class="n">jh</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">-</span><span class="n">lstm</span><span class="o">/</span><span class="n">icefall_asr_timit_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">FMGD0_SI1564</span><span class="o">.</span><span class="n">WAV</span><span class="p">:</span>
|
||
<span class="n">sil</span> <span class="n">hh</span> <span class="n">ah</span> <span class="n">z</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ih</span> <span class="n">n</span> <span class="n">iy</span> <span class="n">w</span> <span class="n">ah</span> <span class="n">z</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ae</span> <span class="n">n</span> <span class="n">ih</span> <span class="n">sil</span> <span class="n">b</span> <span class="n">ay</span> <span class="n">s</span> <span class="n">sil</span> <span class="n">n</span> <span class="n">ey</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">ih</span> <span class="n">l</span> <span class="n">f</span> <span class="n">eh</span> <span class="n">n</span> <span class="n">s</span> <span class="n">ih</span> <span class="n">z</span> <span class="n">eh</span> <span class="n">n</span> <span class="n">dh</span> <span class="n">eh</span> <span class="n">r</span> <span class="n">w</span> <span class="n">er</span> <span class="n">sil</span> <span class="n">g</span> <span class="n">r</span> <span class="n">ey</span> <span class="n">z</span> <span class="n">ih</span> <span class="n">n</span> <span class="n">sil</span> <span class="n">k</span> <span class="n">ae</span> <span class="n">dx</span> <span class="n">l</span> <span class="n">sil</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>2021-11-08 20:05:27,878 INFO [pretrained.py:269] Decoding Done</p>
|
||
</section>
|
||
</section>
|
||
<section id="colab-notebook">
|
||
<h2>Colab notebook<a class="headerlink" href="#colab-notebook" title="Permalink to this heading"></a></h2>
|
||
<p>We provide a colab notebook for decoding with pre-trained model.</p>
|
||
<p><a class="reference external" href="https://colab.research.google.com/drive/1Hs9DA4V96uapw_30uNp32OMJgkuR5VVd"><img alt="timit tdnn_lstm_ctc colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||
<p><strong>Congratulations!</strong> You have finished the TDNN-LSTM-CTC recipe on timit in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||
<a href="tdnn_ligru_ctc.html" class="btn btn-neutral float-left" title="TDNN-LiGRU-CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||
<a href="../yesno/index.html" class="btn btn-neutral float-right" title="YesNo" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||
</div>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2021, icefall development team.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |