mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
1142 lines
225 KiB
HTML
1142 lines
225 KiB
HTML
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en" >
|
||
<head>
|
||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>Conformer CTC — icefall 0.1 documentation</title>
|
||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||
<!--[if lt IE 9]>
|
||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||
<![endif]-->
|
||
|
||
<script src="../../../_static/jquery.js"></script>
|
||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||
<script src="../../../_static/doctools.js"></script>
|
||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||
<script src="../../../_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||
<link rel="search" title="Search" href="../../../search.html" />
|
||
<link rel="next" title="Pruned transducer statelessX" href="pruned_transducer_stateless.html" />
|
||
<link rel="prev" title="TDNN-LSTM-CTC" href="tdnn_lstm_ctc.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
|
||
|
||
|
||
<a href="../../../index.html" class="icon icon-home">
|
||
icefall
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../docker/index.html">Docker</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../model-export/index.html">Model export</a></li>
|
||
</ul>
|
||
<ul class="current">
|
||
<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Recipes</a><ul class="current">
|
||
<li class="toctree-l2 current"><a class="reference internal" href="../index.html">Non Streaming ASR</a><ul class="current">
|
||
<li class="toctree-l3"><a class="reference internal" href="../aishell/index.html">aishell</a></li>
|
||
<li class="toctree-l3 current"><a class="reference internal" href="index.html">LibriSpeech</a><ul class="current">
|
||
<li class="toctree-l4"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM-CTC</a></li>
|
||
<li class="toctree-l4 current"><a class="current reference internal" href="#">Conformer CTC</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="pruned_transducer_stateless.html">Pruned transducer statelessX</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="zipformer_mmi.html">Zipformer MMI</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="zipformer_ctc_blankskip.html">Zipformer CTC Blank Skip</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="distillation.html">Distillation with HuBERT</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../Streaming-ASR/index.html">Streaming ASR</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/index.html">Contributing</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../huggingface/index.html">Huggingface</a></li>
|
||
</ul>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../decoding-with-langugage-models/index.html">Decoding with language models</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="../../../index.html">icefall</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
||
<li class="breadcrumb-item"><a href="../../index.html">Recipes</a></li>
|
||
<li class="breadcrumb-item"><a href="../index.html">Non Streaming ASR</a></li>
|
||
<li class="breadcrumb-item"><a href="index.html">LibriSpeech</a></li>
|
||
<li class="breadcrumb-item active">Conformer CTC</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
<a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst" class="fa fa-github"> Edit on GitHub</a>
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<section id="conformer-ctc">
|
||
<h1>Conformer CTC<a class="headerlink" href="#conformer-ctc" title="Permalink to this heading"></a></h1>
|
||
<p>This tutorial shows you how to run a conformer ctc model
|
||
with the <a class="reference external" href="https://www.openslr.org/12">LibriSpeech</a> dataset.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>We assume you have read the page <a class="reference internal" href="../../../installation/index.html#install-icefall"><span class="std std-ref">Installation</span></a> and have setup
|
||
the environment for <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
</div>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>We recommend you to use a GPU or several GPUs to run this recipe.</p>
|
||
</div>
|
||
<p>In this tutorial, you will learn:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><ol class="arabic simple">
|
||
<li><p>How to prepare data for training and decoding</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="2">
|
||
<li><p>How to start the training, either with a single GPU or multiple GPUs</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="3">
|
||
<li><p>How to do decoding after training, with n-gram LM rescoring and attention decoder rescoring</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="4">
|
||
<li><p>How to use a pre-trained model, provided by us</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="5">
|
||
<li><p>How to deploy your trained model in C++, without Python dependencies</p></li>
|
||
</ol>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<section id="data-preparation">
|
||
<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./prepare.sh
|
||
</pre></div>
|
||
</div>
|
||
<p>The script <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> handles the data preparation for you, <strong>automagically</strong>.
|
||
All you need to do is to run it.</p>
|
||
<p>The data preparation contains several stages, you can use the following two
|
||
options:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stage</span></code></p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stop-stage</span></code></p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>to control which stage(s) should be run. By default, all stages are executed.</p>
|
||
<p>For example,</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">0</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">0</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>means to run only stage 0.</p>
|
||
<p>To run stage 2 to stage 5, use:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">2</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">5</span>
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>If you have pre-downloaded the <a class="reference external" href="https://www.openslr.org/12">LibriSpeech</a>
|
||
dataset and the <a class="reference external" href="http://www.openslr.org/17/">musan</a> dataset, say,
|
||
they are saved in <code class="docutils literal notranslate"><span class="pre">/tmp/LibriSpeech</span></code> and <code class="docutils literal notranslate"><span class="pre">/tmp/musan</span></code>, you can modify
|
||
the <code class="docutils literal notranslate"><span class="pre">dl_dir</span></code> variable in <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> to point to <code class="docutils literal notranslate"><span class="pre">/tmp</span></code> so that
|
||
<code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> won’t re-download them.</p>
|
||
</div>
|
||
<div class="admonition note">
|
||
<p class="admonition-title">Note</p>
|
||
<p>All generated files by <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>, e.g., features, lexicon, etc,
|
||
are saved in <code class="docutils literal notranslate"><span class="pre">./data</span></code> directory.</p>
|
||
</div>
|
||
<p>We provide the following YouTube video showing how to run <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>.</p>
|
||
<div class="admonition note">
|
||
<p class="admonition-title">Note</p>
|
||
<p>To get the latest news of <a class="reference external" href="https://github.com/k2-fsa">next-gen Kaldi</a>, please subscribe
|
||
the following YouTube channel by <a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">Nadira Povey</a>:</p>
|
||
<blockquote>
|
||
<div><p><a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw</a></p>
|
||
</div></blockquote>
|
||
</div>
|
||
<div class="video_wrapper" style="">
|
||
<iframe allowfullscreen="true" src="https://www.youtube.com/embed/ofEIoJL-mGM" style="border: 0; height: 345px; width: 560px">
|
||
</iframe></div></section>
|
||
<section id="training">
|
||
<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
|
||
<section id="configurable-options">
|
||
<h3>Configurable options<a class="headerlink" href="#configurable-options" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows you the training options that can be passed from the commandline.
|
||
The following options are used quite often:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--full-libri</span></code></p>
|
||
<p>If it’s True, the training part uses all the training data, i.e.,
|
||
960 hours. Otherwise, the training part uses only the subset
|
||
<code class="docutils literal notranslate"><span class="pre">train-clean-100</span></code>, which has 100 hours of training data.</p>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||
If <code class="docutils literal notranslate"><span class="pre">--full-libri</span></code> is True, each epoch actually processes
|
||
<code class="docutils literal notranslate"><span class="pre">3x960</span> <span class="pre">==</span> <span class="pre">2880</span></code> hours of data.</p>
|
||
</div>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--num-epochs</span></code></p>
|
||
<p>It is the number of epochs to train. For instance,
|
||
<code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span> <span class="pre">--num-epochs</span> <span class="pre">30</span></code> trains for 30 epochs
|
||
and generates <code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …, <code class="docutils literal notranslate"><span class="pre">epoch-29.pt</span></code>
|
||
in the folder <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp</span></code>.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--start-epoch</span></code></p>
|
||
<p>It’s used to resume training.
|
||
<code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span> <span class="pre">--start-epoch</span> <span class="pre">10</span></code> loads the
|
||
checkpoint <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp/epoch-9.pt</span></code> and starts
|
||
training from epoch 10, based on the state from epoch 9.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--world-size</span></code></p>
|
||
<p>It is used for multi-GPU single-machine DDP training.</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><ol class="loweralpha simple">
|
||
<li><p>If it is 1, then no DDP training is used.</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="loweralpha simple" start="2">
|
||
<li><p>If it is 2, then GPU 0 and GPU 1 are used for DDP training.</p></li>
|
||
</ol>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>The following shows some use cases with it.</p>
|
||
<blockquote>
|
||
<div><p><strong>Use case 1</strong>: You have 4 GPUs, but you only want to use GPU 0 and
|
||
GPU 2 for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,2"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">2</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p><strong>Use case 2</strong>: You have 4 GPUs and you want to use all of them
|
||
for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">4</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p><strong>Use case 3</strong>: You have 4 GPUs but you only want to use GPU 3
|
||
for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"3"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">1</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</div></blockquote>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>Only multi-GPU single-machine DDP training is implemented at present.
|
||
Multi-GPU multi-machine DDP training will be added later.</p>
|
||
</div>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||
<p>It specifies the number of seconds over all utterances in a
|
||
batch, before <strong>padding</strong>.
|
||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||
your are using V100 NVIDIA GPU, we recommend you to set it to <code class="docutils literal notranslate"><span class="pre">200</span></code>.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>Due to padding, the number of seconds of all utterances in a
|
||
batch will usually be larger than <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code>.</p>
|
||
<p>A larger value for <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> may cause OOM during training,
|
||
while a smaller value may increase the training time. You have to
|
||
tune it.</p>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="pre-configured-options">
|
||
<h3>Pre-configured options<a class="headerlink" href="#pre-configured-options" title="Permalink to this heading"></a></h3>
|
||
<p>There are some training options, e.g., weight decay,
|
||
number of warmup steps, results dir, etc,
|
||
that are not passed from the commandline.
|
||
They are pre-configured by the function <code class="docutils literal notranslate"><span class="pre">get_params()</span></code> in
|
||
<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/train.py">conformer_ctc/train.py</a></p>
|
||
<p>You don’t need to change these pre-configured parameters. If you really need to change
|
||
them, please modify <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span></code> directly.</p>
|
||
</section>
|
||
<section id="training-logs">
|
||
<h3>Training logs<a class="headerlink" href="#training-logs" title="Permalink to this heading"></a></h3>
|
||
<p>Training logs and checkpoints are saved in <code class="docutils literal notranslate"><span class="pre">conformer_ctc/exp</span></code>.
|
||
You will find the following files in that directory:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …</p>
|
||
<p>These are checkpoint files, containing model <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
|
||
To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">epoch-10.pt</span></code>, you can use:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--start-epoch<span class="w"> </span><span class="m">11</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">tensorboard/</span></code></p>
|
||
<p>This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||
rate, etc, are recorded in these logs. You can visualize them by:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>conformer_ctc/exp/tensorboard
|
||
$<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w"> </span>upload<span class="w"> </span>--logdir<span class="w"> </span>.<span class="w"> </span>--description<span class="w"> </span><span class="s2">"Conformer CTC training for LibriSpeech with icefall"</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>It will print something like below:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">TensorFlow</span> <span class="n">installation</span> <span class="ow">not</span> <span class="n">found</span> <span class="o">-</span> <span class="n">running</span> <span class="k">with</span> <span class="n">reduced</span> <span class="n">feature</span> <span class="nb">set</span><span class="o">.</span>
|
||
<span class="n">Upload</span> <span class="n">started</span> <span class="ow">and</span> <span class="n">will</span> <span class="k">continue</span> <span class="n">reading</span> <span class="nb">any</span> <span class="n">new</span> <span class="n">data</span> <span class="k">as</span> <span class="n">it</span><span class="s1">'s added to the logdir.</span>
|
||
|
||
<span class="n">To</span> <span class="n">stop</span> <span class="n">uploading</span><span class="p">,</span> <span class="n">press</span> <span class="n">Ctrl</span><span class="o">-</span><span class="n">C</span><span class="o">.</span>
|
||
|
||
<span class="n">New</span> <span class="n">experiment</span> <span class="n">created</span><span class="o">.</span> <span class="n">View</span> <span class="n">your</span> <span class="n">TensorBoard</span> <span class="n">at</span><span class="p">:</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">tensorboard</span><span class="o">.</span><span class="n">dev</span><span class="o">/</span><span class="n">experiment</span><span class="o">/</span><span class="n">lzGnETjwRxC3yghNMd4kPw</span><span class="o">/</span>
|
||
|
||
<span class="p">[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">08</span><span class="o">-</span><span class="mi">24</span><span class="n">T16</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">43</span><span class="p">]</span> <span class="n">Started</span> <span class="n">scanning</span> <span class="n">logdir</span><span class="o">.</span>
|
||
<span class="n">Uploading</span> <span class="mi">4540</span> <span class="n">scalars</span><span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>Note there is a URL in the above output, click it and you will see
|
||
the following screenshot:</p>
|
||
<blockquote>
|
||
<div><figure class="align-center" id="id6">
|
||
<a class="reference external image-reference" href="https://tensorboard.dev/experiment/lzGnETjwRxC3yghNMd4kPw/"><img alt="TensorBoard screenshot" src="../../../_images/librispeech-conformer-ctc-tensorboard-log.png" style="width: 600px;" /></a>
|
||
<figcaption>
|
||
<p><span class="caption-number">Fig. 4 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id6" title="Permalink to this image"></a></p>
|
||
</figcaption>
|
||
</figure>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">log/log-train-xxxx</span></code></p>
|
||
<p>It is the detailed training log in text format, same as the one
|
||
you saw printed to the console during training.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="usage-examples">
|
||
<h3>Usage examples<a class="headerlink" href="#usage-examples" title="Permalink to this heading"></a></h3>
|
||
<p>The following shows typical use cases:</p>
|
||
<section id="case-1">
|
||
<h4><strong>Case 1</strong><a class="headerlink" href="#case-1" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--max-duration<span class="w"> </span><span class="m">200</span><span class="w"> </span>--full-libri<span class="w"> </span><span class="m">0</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It uses <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> of 200 to avoid OOM. Also, it uses only
|
||
a subset of the LibriSpeech data for training.</p>
|
||
</section>
|
||
<section id="case-2">
|
||
<h4><strong>Case 2</strong><a class="headerlink" href="#case-2" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,3"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">2</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It uses GPU 0 and GPU 3 for DDP training.</p>
|
||
</section>
|
||
<section id="case-3">
|
||
<h4><strong>Case 3</strong><a class="headerlink" href="#case-3" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--num-epochs<span class="w"> </span><span class="m">10</span><span class="w"> </span>--start-epoch<span class="w"> </span><span class="m">3</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It loads checkpoint <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp/epoch-2.pt</span></code> and starts
|
||
training from epoch 3. Also, it trains for 10 epochs.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="decoding">
|
||
<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
|
||
<p>The decoding part uses checkpoints saved by the training part, so you have
|
||
to run the training part first.</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/decode.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows the options for decoding.</p>
|
||
<p>The commonly used options are:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--method</span></code></p>
|
||
<p>This specifies the decoding method. This script supports 7 decoding methods.
|
||
As for ctc decoding, it uses a sentence piece model to convert word pieces to words.
|
||
And it needs neither a lexicon nor an n-gram LM.</p>
|
||
<p>For example, the following command uses CTC topology for decoding:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/librispeech/ASR
|
||
$ ./conformer_ctc/decode.py --method ctc-decoding --max-duration 300
|
||
# Caution: The above command is tested with a model with vocab size 500.
|
||
</pre></div>
|
||
</div>
|
||
<p>And the following command uses attention decoder for rescoring:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/librispeech/ASR
|
||
$ ./conformer_ctc/decode.py --method attention-decoder --max-duration 30 --nbest-scale 0.5
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--nbest-scale</span></code></p>
|
||
<p>It is used to scale down lattice scores so that there are more unique
|
||
paths for rescoring.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||
<p>It has the same meaning as the one during training. A larger
|
||
value may cause OOM.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>Here are some results for CTC decoding with a vocab size of 500:</p>
|
||
<p>Usage:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
<span class="c1"># NOTE: Tested with a model with vocab size 500.</span>
|
||
<span class="c1"># It won't work for a model with vocab size 5000.</span>
|
||
$<span class="w"> </span>./conformer_ctc/decode.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--epoch<span class="w"> </span><span class="m">25</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--avg<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--max-duration<span class="w"> </span><span class="m">300</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--exp-dir<span class="w"> </span>conformer_ctc/exp<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--lang-dir<span class="w"> </span>data/lang_bpe_500<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>ctc-decoding
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is given below:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:31,033<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:537<span class="o">]</span><span class="w"> </span>Decoding<span class="w"> </span>started
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:31,033<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:538<span class="o">]</span>
|
||
<span class="o">{</span><span class="s1">'lm_dir'</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">'data/lm'</span><span class="o">)</span>,<span class="w"> </span><span class="s1">'subsampling_factor'</span>:<span class="w"> </span><span class="m">4</span>,<span class="w"> </span><span class="s1">'vgg_frontend'</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">'use_feat_batchnorm'</span>:<span class="w"> </span>True,
|
||
<span class="s1">'feature_dim'</span>:<span class="w"> </span><span class="m">80</span>,<span class="w"> </span><span class="s1">'nhead'</span>:<span class="w"> </span><span class="m">8</span>,<span class="w"> </span><span class="s1">'attention_dim'</span>:<span class="w"> </span><span class="m">512</span>,<span class="w"> </span><span class="s1">'num_decoder_layers'</span>:<span class="w"> </span><span class="m">6</span>,<span class="w"> </span><span class="s1">'search_beam'</span>:<span class="w"> </span><span class="m">20</span>,<span class="w"> </span><span class="s1">'output_beam'</span>:<span class="w"> </span><span class="m">8</span>,
|
||
<span class="s1">'min_active_states'</span>:<span class="w"> </span><span class="m">30</span>,<span class="w"> </span><span class="s1">'max_active_states'</span>:<span class="w"> </span><span class="m">10000</span>,<span class="w"> </span><span class="s1">'use_double_scores'</span>:<span class="w"> </span>True,
|
||
<span class="s1">'epoch'</span>:<span class="w"> </span><span class="m">25</span>,<span class="w"> </span><span class="s1">'avg'</span>:<span class="w"> </span><span class="m">1</span>,<span class="w"> </span><span class="s1">'method'</span>:<span class="w"> </span><span class="s1">'ctc-decoding'</span>,<span class="w"> </span><span class="s1">'num_paths'</span>:<span class="w"> </span><span class="m">100</span>,<span class="w"> </span><span class="s1">'nbest_scale'</span>:<span class="w"> </span><span class="m">0</span>.5,
|
||
<span class="s1">'export'</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">'exp_dir'</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">'conformer_ctc/exp'</span><span class="o">)</span>,<span class="w"> </span><span class="s1">'lang_dir'</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">'data/lang_bpe_500'</span><span class="o">)</span>,<span class="w"> </span><span class="s1">'full_libri'</span>:<span class="w"> </span>False,
|
||
<span class="s1">'feature_dir'</span>:<span class="w"> </span>PosixPath<span class="o">(</span><span class="s1">'data/fbank'</span><span class="o">)</span>,<span class="w"> </span><span class="s1">'max_duration'</span>:<span class="w"> </span><span class="m">100</span>,<span class="w"> </span><span class="s1">'bucketing_sampler'</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">'num_buckets'</span>:<span class="w"> </span><span class="m">30</span>,
|
||
<span class="s1">'concatenate_cuts'</span>:<span class="w"> </span>False,<span class="w"> </span><span class="s1">'duration_factor'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'gap'</span>:<span class="w"> </span><span class="m">1</span>.0,<span class="w"> </span><span class="s1">'on_the_fly_feats'</span>:<span class="w"> </span>False,
|
||
<span class="s1">'shuffle'</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">'return_cuts'</span>:<span class="w"> </span>True,<span class="w"> </span><span class="s1">'num_workers'</span>:<span class="w"> </span><span class="m">2</span><span class="o">}</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:31,406<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>lexicon.py:113<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>pre-compiled<span class="w"> </span>data/lang_bpe_500/Linv.pt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:31,464<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:548<span class="o">]</span><span class="w"> </span>device:<span class="w"> </span>cuda:0
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:36,171<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>checkpoint.py:92<span class="o">]</span><span class="w"> </span>Loading<span class="w"> </span>checkpoint<span class="w"> </span>from<span class="w"> </span>conformer_ctc/exp/epoch-25.pt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:36,776<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:652<span class="o">]</span><span class="w"> </span>Number<span class="w"> </span>of<span class="w"> </span>model<span class="w"> </span>parameters:<span class="w"> </span><span class="m">109226120</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:44:37,714<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">0</span>/206,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">12</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:15,944<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">100</span>/206,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">1328</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:54,443<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">200</span>/206,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">2563</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:56,411<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:494<span class="o">]</span><span class="w"> </span>The<span class="w"> </span>transcripts<span class="w"> </span>are<span class="w"> </span>stored<span class="w"> </span><span class="k">in</span><span class="w"> </span>conformer_ctc/exp/recogs-test-clean-ctc-decoding.txt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:56,592<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>utils.py:331<span class="o">]</span><span class="w"> </span><span class="o">[</span>test-clean-ctc-decoding<span class="o">]</span><span class="w"> </span>%WER<span class="w"> </span><span class="m">3</span>.26%<span class="w"> </span><span class="o">[</span><span class="m">1715</span><span class="w"> </span>/<span class="w"> </span><span class="m">52576</span>,<span class="w"> </span><span class="m">163</span><span class="w"> </span>ins,<span class="w"> </span><span class="m">128</span><span class="w"> </span>del,<span class="w"> </span><span class="m">1424</span><span class="w"> </span>sub<span class="w"> </span><span class="o">]</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:56,807<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:506<span class="o">]</span><span class="w"> </span>Wrote<span class="w"> </span>detailed<span class="w"> </span>error<span class="w"> </span>stats<span class="w"> </span>to<span class="w"> </span>conformer_ctc/exp/errs-test-clean-ctc-decoding.txt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:56,808<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:522<span class="o">]</span>
|
||
For<span class="w"> </span>test-clean,<span class="w"> </span>WER<span class="w"> </span>of<span class="w"> </span>different<span class="w"> </span>settings<span class="w"> </span>are:
|
||
ctc-decoding<span class="w"> </span><span class="m">3</span>.26<span class="w"> </span>best<span class="w"> </span><span class="k">for</span><span class="w"> </span>test-clean
|
||
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:45:57,362<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">0</span>/203,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">15</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:46:35,565<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">100</span>/203,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">1477</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:15,106<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:473<span class="o">]</span><span class="w"> </span>batch<span class="w"> </span><span class="m">200</span>/203,<span class="w"> </span>cuts<span class="w"> </span>processed<span class="w"> </span><span class="k">until</span><span class="w"> </span>now<span class="w"> </span>is<span class="w"> </span><span class="m">2922</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:16,131<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:494<span class="o">]</span><span class="w"> </span>The<span class="w"> </span>transcripts<span class="w"> </span>are<span class="w"> </span>stored<span class="w"> </span><span class="k">in</span><span class="w"> </span>conformer_ctc/exp/recogs-test-other-ctc-decoding.txt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:16,208<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>utils.py:331<span class="o">]</span><span class="w"> </span><span class="o">[</span>test-other-ctc-decoding<span class="o">]</span><span class="w"> </span>%WER<span class="w"> </span><span class="m">8</span>.21%<span class="w"> </span><span class="o">[</span><span class="m">4295</span><span class="w"> </span>/<span class="w"> </span><span class="m">52343</span>,<span class="w"> </span><span class="m">396</span><span class="w"> </span>ins,<span class="w"> </span><span class="m">315</span><span class="w"> </span>del,<span class="w"> </span><span class="m">3584</span><span class="w"> </span>sub<span class="w"> </span><span class="o">]</span>
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:16,432<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:506<span class="o">]</span><span class="w"> </span>Wrote<span class="w"> </span>detailed<span class="w"> </span>error<span class="w"> </span>stats<span class="w"> </span>to<span class="w"> </span>conformer_ctc/exp/errs-test-other-ctc-decoding.txt
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:16,432<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:522<span class="o">]</span>
|
||
For<span class="w"> </span>test-other,<span class="w"> </span>WER<span class="w"> </span>of<span class="w"> </span>different<span class="w"> </span>settings<span class="w"> </span>are:
|
||
ctc-decoding<span class="w"> </span><span class="m">8</span>.21<span class="w"> </span>best<span class="w"> </span><span class="k">for</span><span class="w"> </span>test-other
|
||
|
||
<span class="m">2021</span>-09-26<span class="w"> </span><span class="m">12</span>:47:16,433<span class="w"> </span>INFO<span class="w"> </span><span class="o">[</span>decode.py:680<span class="o">]</span><span class="w"> </span>Done!
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="pre-trained-model">
|
||
<h2>Pre-trained Model<a class="headerlink" href="#pre-trained-model" title="Permalink to this heading"></a></h2>
|
||
<p>We have uploaded a pre-trained model to
|
||
<a class="reference external" href="https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09">https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09</a></p>
|
||
<p>We describe how to use the pre-trained model to transcribe a sound file or
|
||
multiple sound files in the following.</p>
|
||
<section id="install-kaldifeat">
|
||
<h3>Install kaldifeat<a class="headerlink" href="#install-kaldifeat" title="Permalink to this heading"></a></h3>
|
||
<p><a class="reference external" href="https://github.com/csukuangfj/kaldifeat">kaldifeat</a> is used to
|
||
extract features for a single sound file or multiple sound files
|
||
at the same time.</p>
|
||
<p>Please refer to <a class="reference external" href="https://github.com/csukuangfj/kaldifeat">https://github.com/csukuangfj/kaldifeat</a> for installation.</p>
|
||
</section>
|
||
<section id="download-the-pre-trained-model">
|
||
<h3>Download the pre-trained model<a class="headerlink" href="#download-the-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||
<p>The following commands describe how to download the pre-trained model:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>git<span class="w"> </span>clone<span class="w"> </span>https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||
$<span class="w"> </span>git<span class="w"> </span>lfs<span class="w"> </span>pull
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>You have to use <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">lfs</span> <span class="pre">pull</span></code> to download the pre-trained model.
|
||
Otherwise, you will have the following issue when running <code class="docutils literal notranslate"><span class="pre">decode.py</span></code>:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">_pickle</span><span class="o">.</span><span class="n">UnpicklingError</span><span class="p">:</span> <span class="n">invalid</span> <span class="n">load</span> <span class="n">key</span><span class="p">,</span> <span class="s1">'v'</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>To fix that issue, please use:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span><span class="w"> </span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||
git<span class="w"> </span>lfs<span class="w"> </span>pull
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>In order to use this pre-trained model, your k2 version has to be v1.9 or later.</p>
|
||
</div>
|
||
<p>After downloading, you will have the following files:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>tree<span class="w"> </span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||
</pre></div>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
||
<span class="p">|</span>--<span class="w"> </span>README.md
|
||
<span class="p">|</span>--<span class="w"> </span>data
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>lang_bpe_500
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>HLG.pt
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>HLG_modified.pt
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>bpe.model
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>tokens.txt
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>words.txt
|
||
<span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>lm
|
||
<span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>G_4_gram.pt
|
||
<span class="p">|</span>--<span class="w"> </span>exp
|
||
<span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>cpu_jit.pt
|
||
<span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>pretrained.pt
|
||
<span class="p">|</span>--<span class="w"> </span>log
|
||
<span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>log-decode-2021-11-09-17-38-28
|
||
<span class="sb">`</span>--<span class="w"> </span>test_wavs
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span><span class="m">1089</span>-134686-0001.wav
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span><span class="m">1221</span>-135766-0001.wav
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span><span class="m">1221</span>-135766-0002.wav
|
||
<span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>trans.txt
|
||
</pre></div>
|
||
</div>
|
||
<dl>
|
||
<dt><strong>File descriptions</strong>:</dt><dd><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_bpe_500/HLG.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is the decoding graph.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_bpe_500/HLG_modified.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It uses a modified CTC topology while building HLG.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_bpe_500/bpe.model</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is a sentencepiece model. You can use it to reproduce our results.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_bpe_500/tokens.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains tokens and their IDs, generated from <code class="docutils literal notranslate"><span class="pre">bpe.model</span></code>.
|
||
Provided only for convenience so that you can look up the SOS/EOS ID easily.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_bpe_500/words.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains words and their IDs.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lm/G_4_gram.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is a 4-gram LM, used for n-gram LM rescoring.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp/pretrained.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains pre-trained model parameters, obtained by averaging
|
||
checkpoints from <code class="docutils literal notranslate"><span class="pre">epoch-23.pt</span></code> to <code class="docutils literal notranslate"><span class="pre">epoch-77.pt</span></code>.
|
||
Note: We have removed optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> to reduce file size.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp/cpu_jit.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains torch scripted model that can be deployed in C++.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_wavs/*.wav</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains some test sound files from LibriSpeech <code class="docutils literal notranslate"><span class="pre">test-clean</span></code> dataset.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_wavs/trans.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains the reference transcripts for the sound files in <code class="docutils literal notranslate"><span class="pre">test_wavs/</span></code>.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ul>
|
||
</dd>
|
||
</dl>
|
||
<p>The information of the test sound files is listed below:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>soxi<span class="w"> </span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/*.wav
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:06.62<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">106000</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">496</span>.875<span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>212k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:16.71<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">267440</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">1253</span>.62<span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>535k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:04.83<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">77200</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">361</span>.875<span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>154k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
Total<span class="w"> </span>Duration<span class="w"> </span>of<span class="w"> </span><span class="m">3</span><span class="w"> </span>files:<span class="w"> </span><span class="m">00</span>:00:28.16
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="usage">
|
||
<h3>Usage<a class="headerlink" href="#usage" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/librispeech/ASR
|
||
$ ./conformer_ctc/pretrained.py --help
|
||
</pre></div>
|
||
</div>
|
||
<p>displays the help information.</p>
|
||
<p>It supports 4 decoding methods:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p>CTC decoding</p></li>
|
||
<li><p>HLG decoding</p></li>
|
||
<li><p>HLG + n-gram LM rescoring</p></li>
|
||
<li><p>HLG + n-gram LM rescoring + attention decoder rescoring</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<section id="ctc-decoding">
|
||
<h4>CTC decoding<a class="headerlink" href="#ctc-decoding" title="Permalink to this heading"></a></h4>
|
||
<p>CTC decoding uses the best path of the decoding lattice as the decoding result
|
||
without any LM or lexicon.</p>
|
||
<p>The command to run CTC decoding is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--bpe-model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/bpe.model<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>ctc-decoding<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-classes<span class="w"> </span><span class="m">500</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is given below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">29</span><span class="p">,</span><span class="mi">554</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">260</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'bpe_model'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/bpe.model'</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'ctc-decoding'</span><span class="p">,</span> <span class="s1">'G'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">1.3</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">1.2</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">],</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Tue Oct 26 22:12:54 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+missing.version.file'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'bpe-500'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'8d93169-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 10 11:52:44 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/icefall-fix'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/k2-bpe-500/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/lhotse-bpe-500/lhotse/__init__.py'</span><span class="p">}}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">29</span><span class="p">,</span><span class="mi">554</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">266</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">29</span><span class="p">,</span><span class="mi">554</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">268</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">35</span><span class="p">,</span><span class="mi">600</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">285</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">35</span><span class="p">,</span><span class="mi">601</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">295</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">35</span><span class="p">,</span><span class="mi">758</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">301</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">36</span><span class="p">,</span><span class="mi">025</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">319</span><span class="p">]</span> <span class="n">Use</span> <span class="n">CTC</span> <span class="n">decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">36</span><span class="p">,</span><span class="mi">204</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">425</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROFFELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">B</span>
|
||
<span class="n">OSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">12</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">36</span><span class="p">,</span><span class="mi">204</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">427</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding">
|
||
<h4>HLG decoding<a class="headerlink" href="#hlg-decoding" title="Permalink to this heading"></a></h4>
|
||
<p>HLG decoding uses the best path of the decoding lattice as the decoding result.</p>
|
||
<p>The command to run HLG decoding is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>1best<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-classes<span class="w"> </span><span class="m">500</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is given below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">03</span><span class="p">,</span><span class="mi">723</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">260</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt'</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt'</span><span class="p">,</span> <span class="s1">'bpe_model'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'1best'</span><span class="p">,</span> <span class="s1">'G'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">1.3</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">1.2</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">],</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Tue Oct 26 22:12:54 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+missing.version.file'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'bpe-500'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'8d93169-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 10 11:52:44 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/icefall-fix'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/k2-bpe-500/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/lhotse-bpe-500/lhotse/__init__.py'</span><span class="p">}}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">03</span><span class="p">,</span><span class="mi">723</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">266</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">03</span><span class="p">,</span><span class="mi">723</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">268</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">09</span><span class="p">,</span><span class="mi">775</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">285</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">09</span><span class="p">,</span><span class="mi">776</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">295</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">09</span><span class="p">,</span><span class="mi">881</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">301</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">09</span><span class="p">,</span><span class="mi">951</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">352</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span><span class="mi">234</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">384</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span><span class="mi">571</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">425</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">13</span><span class="p">,</span><span class="mi">571</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">427</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding-lm-rescoring">
|
||
<h4>HLG decoding + LM rescoring<a class="headerlink" href="#hlg-decoding-lm-rescoring" title="Permalink to this heading"></a></h4>
|
||
<p>It uses an n-gram LM to rescore the decoding lattice and the best
|
||
path of the rescored lattice is the decoding result.</p>
|
||
<p>The command to run HLG decoding + LM rescoring is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>whole-lattice-rescoring<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-classes<span class="w"> </span><span class="m">500</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--G<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--ngram-lm-scale<span class="w"> </span><span class="m">1</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>Its output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">39</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">857</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">260</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt'</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt'</span><span class="p">,</span> <span class="s1">'bpe_model'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'whole-lattice-rescoring'</span><span class="p">,</span> <span class="s1">'G'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt'</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">1.0</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">1.2</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">],</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-$it-sha1'</span><span class="p">:</span> <span class="s1">'7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Tue Oct 26 22:12:54 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+missing.version.file'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'bpe-500'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'8d93169-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 10 11:52:44 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/icefall-fix'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/k2-bpe-500/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/lhotse-bpe-500/lhotse/__init__.py'</span><span class="p">}}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">39</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">858</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">266</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">39</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">858</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">268</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">01</span><span class="p">,</span><span class="mi">979</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">285</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">01</span><span class="p">,</span><span class="mi">980</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">295</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">02</span><span class="p">,</span><span class="mi">055</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">301</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">02</span><span class="p">,</span><span class="mi">117</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">352</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">05</span><span class="p">,</span><span class="mi">051</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">363</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">G</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lm</span><span class="o">/</span><span class="n">G_4_gram</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">18</span><span class="p">,</span><span class="mi">959</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">389</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span> <span class="o">+</span> <span class="n">LM</span> <span class="n">rescoring</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">19</span><span class="p">,</span><span class="mi">546</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">425</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">40</span><span class="p">:</span><span class="mi">19</span><span class="p">,</span><span class="mi">546</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">427</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding-lm-rescoring-attention-decoder-rescoring">
|
||
<h4>HLG decoding + LM rescoring + attention decoder rescoring<a class="headerlink" href="#hlg-decoding-lm-rescoring-attention-decoder-rescoring" title="Permalink to this heading"></a></h4>
|
||
<p>It uses an n-gram LM to rescore the decoding lattice, extracts
|
||
n paths from the rescored lattice, recores the extracted paths with
|
||
an attention decoder. The path with the highest score is the decoding result.</p>
|
||
<p>The command to run HLG decoding + LM rescoring + attention decoder rescoring is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>attention-decoder<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-classes<span class="w"> </span><span class="m">500</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--G<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--ngram-lm-scale<span class="w"> </span><span class="m">2</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--attention-decoder-scale<span class="w"> </span><span class="m">2</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nbest-scale<span class="w"> </span><span class="m">0</span>.5<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-paths<span class="w"> </span><span class="m">100</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--sos-id<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--eos-id<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">45</span><span class="p">,</span><span class="mi">598</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">260</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt'</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt'</span><span class="p">,</span> <span class="s1">'bpe_model'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'attention-decoder'</span><span class="p">,</span> <span class="s1">'G'</span><span class="p">:</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt'</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">2.0</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">2.0</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">500</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">],</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Tue Oct 26 22:12:54 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+missing.version.file'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'bpe-500'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'8d93169-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 10 11:52:44 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/icefall-fix'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/k2-bpe-500/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-fj/fangjun/open-source-2/lhotse-bpe-500/lhotse/__init__.py'</span><span class="p">}}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">45</span><span class="p">,</span><span class="mi">599</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">266</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">45</span><span class="p">,</span><span class="mi">599</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">268</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">833</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">285</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">834</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">295</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav'</span><span class="p">,</span> <span class="s1">'./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">915</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">301</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">52</span><span class="p">,</span><span class="mi">076</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">352</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">110</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">363</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">G</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lm</span><span class="o">/</span><span class="n">G_4_gram</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mi">09</span><span class="p">,</span><span class="mi">329</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">397</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="o">+</span> <span class="n">LM</span> <span class="n">rescoring</span> <span class="o">+</span> <span class="n">attention</span> <span class="n">decoder</span> <span class="n">rescoring</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mi">10</span><span class="p">,</span><span class="mi">192</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">425</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mi">10</span><span class="p">,</span><span class="mi">192</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">427</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="compute-wer-with-the-pre-trained-model">
|
||
<h3>Compute WER with the pre-trained model<a class="headerlink" href="#compute-wer-with-the-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||
<p>To check the WER of the pre-trained model on the test datasets, run:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/librispeech/ASR
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/
|
||
$<span class="w"> </span>ln<span class="w"> </span>-s<span class="w"> </span>pretrained.pt<span class="w"> </span>epoch-999.pt
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>../..
|
||
$<span class="w"> </span>./conformer_ctc/decode.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--exp-dir<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--lang-dir<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--lm-dir<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--epoch<span class="w"> </span><span class="m">999</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--avg<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--concatenate-cuts<span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--bucketing-sampler<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--max-duration<span class="w"> </span><span class="m">30</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num-paths<span class="w"> </span><span class="m">1000</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>attention-decoder<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nbest-scale<span class="w"> </span><span class="m">0</span>.5
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
<section id="colab-notebook">
|
||
<h2>Colab notebook<a class="headerlink" href="#colab-notebook" title="Permalink to this heading"></a></h2>
|
||
<p>We do provide a colab notebook for this recipe showing how to use a pre-trained model.</p>
|
||
<p><a class="reference external" href="https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing"><img alt="librispeech asr conformer ctc colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>Due to limited memory provided by Colab, you have to upgrade to Colab Pro to
|
||
run <code class="docutils literal notranslate"><span class="pre">HLG</span> <span class="pre">decoding</span> <span class="pre">+</span> <span class="pre">LM</span> <span class="pre">rescoring</span></code> and
|
||
<code class="docutils literal notranslate"><span class="pre">HLG</span> <span class="pre">decoding</span> <span class="pre">+</span> <span class="pre">LM</span> <span class="pre">rescoring</span> <span class="pre">+</span> <span class="pre">attention</span> <span class="pre">decoder</span> <span class="pre">rescoring</span></code>.
|
||
Otherwise, you can only run <code class="docutils literal notranslate"><span class="pre">HLG</span> <span class="pre">decoding</span></code> with Colab.</p>
|
||
</div>
|
||
<p><strong>Congratulations!</strong> You have finished the LibriSpeech ASR recipe with
|
||
conformer CTC models in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
<p>If you want to deploy your trained model in C++, please read the following section.</p>
|
||
</section>
|
||
<section id="deployment-with-c">
|
||
<h2>Deployment with C++<a class="headerlink" href="#deployment-with-c" title="Permalink to this heading"></a></h2>
|
||
<p>This section describes how to deploy the pre-trained model in C++, without
|
||
Python dependencies.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>At present, it does NOT support streaming decoding.</p>
|
||
</div>
|
||
<p>First, let us compile k2 from source:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span><span class="nv">$HOME</span>
|
||
$<span class="w"> </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/k2-fsa/k2
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>k2
|
||
$<span class="w"> </span>git<span class="w"> </span>checkout<span class="w"> </span>v2.0-pre
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>You have to switch to the branch <code class="docutils literal notranslate"><span class="pre">v2.0-pre</span></code>!</p>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>mkdir<span class="w"> </span>build-release
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>build-release
|
||
$<span class="w"> </span>cmake<span class="w"> </span>-DCMAKE_BUILD_TYPE<span class="o">=</span>Release<span class="w"> </span>..
|
||
$<span class="w"> </span>make<span class="w"> </span>-j<span class="w"> </span>ctc_decode<span class="w"> </span>hlg_decode<span class="w"> </span>ngram_lm_rescore<span class="w"> </span>attention_rescore
|
||
|
||
<span class="c1"># You will find four binaries in `./bin`, i.e.,</span>
|
||
<span class="c1"># ./bin/ctc_decode, ./bin/hlg_decode,</span>
|
||
<span class="c1"># ./bin/ngram_lm_rescore, and ./bin/attention_rescore</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Now you are ready to go!</p>
|
||
<p>Assume you have run:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>k2/build-release
|
||
$<span class="w"> </span>ln<span class="w"> </span>-s<span class="w"> </span>/path/to/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09<span class="w"> </span>./
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>To view the usage of <code class="docutils literal notranslate"><span class="pre">./bin/ctc_decode</span></code>, run:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ ./bin/ctc_decode
|
||
</pre></div>
|
||
</div>
|
||
<p>It will show you the following message:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>Please<span class="w"> </span>provide<span class="w"> </span>--nn_model
|
||
|
||
This<span class="w"> </span>file<span class="w"> </span>implements<span class="w"> </span>decoding<span class="w"> </span>with<span class="w"> </span>a<span class="w"> </span>CTC<span class="w"> </span>topology,<span class="w"> </span>without<span class="w"> </span>any
|
||
kinds<span class="w"> </span>of<span class="w"> </span>LM<span class="w"> </span>or<span class="w"> </span>lexicons.
|
||
|
||
Usage:
|
||
<span class="w"> </span>./bin/ctc_decode<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>torch<span class="w"> </span>scripted<span class="w"> </span>pt<span class="w"> </span>file><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--bpe_model<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>pre-trained<span class="w"> </span>BPE<span class="w"> </span>model><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>foo.wav><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>bar.wav><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><more<span class="w"> </span>waves<span class="w"> </span><span class="k">if</span><span class="w"> </span>any>
|
||
|
||
To<span class="w"> </span>see<span class="w"> </span>all<span class="w"> </span>possible<span class="w"> </span>options,<span class="w"> </span>use
|
||
<span class="w"> </span>./bin/ctc_decode<span class="w"> </span>--help
|
||
|
||
Caution:
|
||
<span class="w"> </span>-<span class="w"> </span>Only<span class="w"> </span>sound<span class="w"> </span>files<span class="w"> </span><span class="o">(</span>*.wav<span class="o">)</span><span class="w"> </span>with<span class="w"> </span>single<span class="w"> </span>channel<span class="w"> </span>are<span class="w"> </span>supported.
|
||
<span class="w"> </span>-<span class="w"> </span>It<span class="w"> </span>assumes<span class="w"> </span>the<span class="w"> </span>model<span class="w"> </span>is<span class="w"> </span>conformer_ctc/transformer.py<span class="w"> </span>from<span class="w"> </span>icefall.
|
||
<span class="w"> </span>If<span class="w"> </span>you<span class="w"> </span>use<span class="w"> </span>a<span class="w"> </span>different<span class="w"> </span>model,<span class="w"> </span>you<span class="w"> </span>have<span class="w"> </span>to<span class="w"> </span>change<span class="w"> </span>the<span class="w"> </span>code
|
||
<span class="w"> </span>related<span class="w"> </span>to<span class="w"> </span><span class="sb">`</span>model.forward<span class="sb">`</span><span class="w"> </span><span class="k">in</span><span class="w"> </span>this<span class="w"> </span>file.
|
||
</pre></div>
|
||
</div>
|
||
<section id="id2">
|
||
<h3>CTC decoding<a class="headerlink" href="#id2" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./bin/ctc_decode<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--bpe_model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/bpe.model<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>Its output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">57</span><span class="p">:</span><span class="mf">55.316</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">105</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Use</span> <span class="n">GPU</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">57</span><span class="p">:</span><span class="mf">55.316</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">109</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">57</span><span class="p">:</span><span class="mf">55.316</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">118</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">wave</span> <span class="n">files</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">01.221</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">125</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">01.222</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">136</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">features</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">01.228</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">144</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">neural</span> <span class="n">network</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">02.19</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">159</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">nnet_output</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">02.543</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">174</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">CTC</span> <span class="n">topo</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">02.547</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">177</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">58</span><span class="p">:</span><span class="mf">02.708</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ctc_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">207</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span>
|
||
<span class="n">Decoding</span> <span class="n">result</span><span class="p">:</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROFFELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="id3">
|
||
<h3>HLG decoding<a class="headerlink" href="#id3" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./bin/hlg_decode<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--hlg<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--word_table<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">04.729</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">111</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Use</span> <span class="n">GPU</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">04.729</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">115</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">04.729</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">124</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">wave</span> <span class="n">files</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">10.702</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">131</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">10.703</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">142</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">features</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">10.707</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">150</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">neural</span> <span class="n">network</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">11.545</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">165</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">nnet_output</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">12.72</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">180</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">12.994</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">185</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">13</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">13.268</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">216</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span>
|
||
<span class="n">Decoding</span> <span class="n">result</span><span class="p">:</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding-n-gram-lm-rescoring">
|
||
<h3>HLG decoding + n-gram LM rescoring<a class="headerlink" href="#hlg-decoding-n-gram-lm-rescoring" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./bin/ngram_lm_rescore<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--hlg<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--g<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--ngram_lm_scale<span class="w"> </span><span class="m">1</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--word_table<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">00</span><span class="p">:</span><span class="mf">55.279</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">122</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Use</span> <span class="n">GPU</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">00</span><span class="p">:</span><span class="mf">55.280</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">126</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">00</span><span class="p">:</span><span class="mf">55.280</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">135</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">wave</span> <span class="n">files</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">01.214</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">142</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">01.215</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">153</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">features</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">01.219</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">161</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">neural</span> <span class="n">network</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">01.945</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">176</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">nnet_output</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">02.475</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">191</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">03.398</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">199</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">03.515</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">205</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">n</span><span class="o">-</span><span class="n">gram</span> <span class="n">LM</span><span class="p">:</span> <span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lm</span><span class="o">/</span><span class="n">G_4_gram</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">07.432</span> <span class="p">[</span><span class="n">W</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="n">csrc</span><span class="o">/</span><span class="n">deserialization</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">441</span><span class="p">:</span><span class="n">k2</span><span class="p">::</span><span class="n">FsaClass</span> <span class="n">k2</span><span class="p">::</span><span class="n">LoadFsa</span><span class="p">(</span><span class="n">const</span> <span class="n">string</span><span class="o">&</span><span class="p">,</span> <span class="n">c10</span><span class="p">::</span><span class="n">optional</span><span class="o"><</span><span class="n">c10</span><span class="p">::</span><span class="n">Device</span><span class="o">></span><span class="p">)</span>
|
||
<span class="n">Ignore</span> <span class="n">non</span> <span class="n">tensor</span> <span class="n">attribute</span><span class="p">:</span> <span class="s1">'dummy'</span> <span class="n">of</span> <span class="nb">type</span><span class="p">:</span> <span class="n">Int</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">07.589</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">214</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Rescore</span> <span class="k">with</span> <span class="n">an</span> <span class="n">n</span><span class="o">-</span><span class="n">gram</span> <span class="n">LM</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">08.68</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">ngram_lm_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">242</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span>
|
||
<span class="n">Decoding</span> <span class="n">result</span><span class="p">:</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring">
|
||
<h3>HLG decoding + n-gram LM rescoring + attention decoder rescoring<a class="headerlink" href="#hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./bin/attention_rescore<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--hlg<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--g<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lm/G_4_gram.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--ngram_lm_scale<span class="w"> </span><span class="m">2</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--attention_scale<span class="w"> </span><span class="m">2</span>.0<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--num_paths<span class="w"> </span><span class="m">100</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nbest_scale<span class="w"> </span><span class="m">0</span>.5<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--word_table<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/data/lang_bpe_500/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--sos_id<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--eos_id<span class="w"> </span><span class="m">1</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1089-134686-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0001.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09/test_wavs/1221-135766-0002.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">43.656</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">149</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Use</span> <span class="n">GPU</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">43.656</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">153</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">43.656</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">162</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">wave</span> <span class="n">files</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">49.216</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">169</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">49.217</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">180</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">features</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">49.222</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">188</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">neural</span> <span class="n">network</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">49.984</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">203</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">nnet_output</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">50.624</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">220</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_bpe_500</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">51.519</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">228</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">51.632</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">234</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">n</span><span class="o">-</span><span class="n">gram</span> <span class="n">LM</span><span class="p">:</span> <span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lm</span><span class="o">/</span><span class="n">G_4_gram</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">55.537</span> <span class="p">[</span><span class="n">W</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="n">csrc</span><span class="o">/</span><span class="n">deserialization</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">441</span><span class="p">:</span><span class="n">k2</span><span class="p">::</span><span class="n">FsaClass</span> <span class="n">k2</span><span class="p">::</span><span class="n">LoadFsa</span><span class="p">(</span><span class="n">const</span> <span class="n">string</span><span class="o">&</span><span class="p">,</span> <span class="n">c10</span><span class="p">::</span><span class="n">optional</span><span class="o"><</span><span class="n">c10</span><span class="p">::</span><span class="n">Device</span><span class="o">></span><span class="p">)</span> <span class="n">Ignore</span> <span class="n">non</span> <span class="n">tensor</span> <span class="n">attribute</span><span class="p">:</span> <span class="s1">'dummy'</span> <span class="n">of</span> <span class="nb">type</span><span class="p">:</span> <span class="n">Int</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">55.645</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">243</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Rescore</span> <span class="k">with</span> <span class="n">an</span> <span class="n">n</span><span class="o">-</span><span class="n">gram</span> <span class="n">LM</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">55.970</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">246</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Sample</span> <span class="mi">100</span> <span class="n">paths</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">56.215</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">293</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Run</span> <span class="n">attention</span> <span class="n">decoder</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">57.35</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">303</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Rescoring</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">10</span> <span class="mi">14</span><span class="p">:</span><span class="mi">02</span><span class="p">:</span><span class="mf">57.179</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">attention_rescore</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">369</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span>
|
||
<span class="n">Decoding</span> <span class="n">result</span><span class="p">:</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1089</span><span class="o">-</span><span class="mi">134686</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">AFTER</span> <span class="n">EARLY</span> <span class="n">NIGHTFALL</span> <span class="n">THE</span> <span class="n">YELLOW</span> <span class="n">LAMPS</span> <span class="n">WOULD</span> <span class="n">LIGHT</span> <span class="n">UP</span> <span class="n">HERE</span> <span class="n">AND</span> <span class="n">THERE</span> <span class="n">THE</span> <span class="n">SQUALID</span> <span class="n">QUARTER</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">BROTHELS</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0001.</span><span class="n">wav</span>
|
||
<span class="n">GOD</span> <span class="n">AS</span> <span class="n">A</span> <span class="n">DIRECT</span> <span class="n">CONSEQUENCE</span> <span class="n">OF</span> <span class="n">THE</span> <span class="n">SIN</span> <span class="n">WHICH</span> <span class="n">MAN</span> <span class="n">THUS</span> <span class="n">PUNISHED</span> <span class="n">HAD</span> <span class="n">GIVEN</span> <span class="n">HER</span> <span class="n">A</span> <span class="n">LOVELY</span> <span class="n">CHILD</span> <span class="n">WHOSE</span> <span class="n">PLACE</span> <span class="n">WAS</span> <span class="n">ON</span> <span class="n">THAT</span> <span class="n">SAME</span> <span class="n">DISHONORED</span> <span class="n">BOSOM</span> <span class="n">TO</span> <span class="n">CONNECT</span> <span class="n">HER</span> <span class="n">PARENT</span> <span class="n">FOREVER</span> <span class="n">WITH</span> <span class="n">THE</span> <span class="n">RACE</span> <span class="n">AND</span> <span class="n">DESCENT</span> <span class="n">OF</span> <span class="n">MORTALS</span> <span class="n">AND</span> <span class="n">TO</span> <span class="n">BE</span> <span class="n">FINALLY</span> <span class="n">A</span> <span class="n">BLESSED</span> <span class="n">SOUL</span> <span class="n">IN</span> <span class="n">HEAVEN</span>
|
||
|
||
<span class="o">./</span><span class="n">icefall</span><span class="o">-</span><span class="n">asr</span><span class="o">-</span><span class="n">librispeech</span><span class="o">-</span><span class="n">conformer</span><span class="o">-</span><span class="n">ctc</span><span class="o">-</span><span class="n">jit</span><span class="o">-</span><span class="n">bpe</span><span class="o">-</span><span class="mi">500</span><span class="o">-</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">09</span><span class="o">/</span><span class="n">test_wavs</span><span class="o">/</span><span class="mi">1221</span><span class="o">-</span><span class="mi">135766</span><span class="o">-</span><span class="mf">0002.</span><span class="n">wav</span>
|
||
<span class="n">YET</span> <span class="n">THESE</span> <span class="n">THOUGHTS</span> <span class="n">AFFECTED</span> <span class="n">HESTER</span> <span class="n">PRYNNE</span> <span class="n">LESS</span> <span class="n">WITH</span> <span class="n">HOPE</span> <span class="n">THAN</span> <span class="n">APPREHENSION</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>There is a Colab notebook showing you how to run a torch scripted model in C++.
|
||
Please see <a class="reference external" href="https://colab.research.google.com/drive/1BIGLWzS36isskMXHKcqC9ysN6pspYXs_?usp=sharing"><img alt="librispeech asr conformer ctc torch script colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||
<a href="tdnn_lstm_ctc.html" class="btn btn-neutral float-left" title="TDNN-LSTM-CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||
<a href="pruned_transducer_stateless.html" class="btn btn-neutral float-right" title="Pruned transducer statelessX" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||
</div>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2021, icefall development team.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |