mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
823 lines
103 KiB
HTML
823 lines
103 KiB
HTML
<!DOCTYPE html>
|
||
<html class="writer-html5" lang="en" >
|
||
<head>
|
||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||
<title>Conformer CTC — icefall 0.1 documentation</title>
|
||
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
|
||
<link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
|
||
<!--[if lt IE 9]>
|
||
<script src="../../../_static/js/html5shiv.min.js"></script>
|
||
<![endif]-->
|
||
|
||
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
|
||
<script src="../../../_static/jquery.js"></script>
|
||
<script src="../../../_static/underscore.js"></script>
|
||
<script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||
<script src="../../../_static/doctools.js"></script>
|
||
<script src="../../../_static/sphinx_highlight.js"></script>
|
||
<script src="../../../_static/js/theme.js"></script>
|
||
<link rel="index" title="Index" href="../../../genindex.html" />
|
||
<link rel="search" title="Search" href="../../../search.html" />
|
||
<link rel="next" title="Stateless Transducer" href="stateless_transducer.html" />
|
||
<link rel="prev" title="TDNN-LSTM CTC" href="tdnn_lstm_ctc.html" />
|
||
</head>
|
||
|
||
<body class="wy-body-for-nav">
|
||
<div class="wy-grid-for-nav">
|
||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||
<div class="wy-side-scroll">
|
||
<div class="wy-side-nav-search" >
|
||
<a href="../../../index.html" class="icon icon-home"> icefall
|
||
</a>
|
||
<div role="search">
|
||
<form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
|
||
<input type="text" name="q" placeholder="Search docs" />
|
||
<input type="hidden" name="check_keywords" value="yes" />
|
||
<input type="hidden" name="area" value="default" />
|
||
</form>
|
||
</div>
|
||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../installation/index.html">Installation</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../faqs.html">Frequently Asked Questions (FAQs)</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../model-export/index.html">Model export</a></li>
|
||
</ul>
|
||
<ul class="current">
|
||
<li class="toctree-l1 current"><a class="reference internal" href="../../index.html">Recipes</a><ul class="current">
|
||
<li class="toctree-l2 current"><a class="reference internal" href="../index.html">Non Streaming ASR</a><ul class="current">
|
||
<li class="toctree-l3 current"><a class="reference internal" href="index.html">aishell</a><ul class="current">
|
||
<li class="toctree-l4"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM CTC</a></li>
|
||
<li class="toctree-l4 current"><a class="current reference internal" href="#">Conformer CTC</a></li>
|
||
<li class="toctree-l4"><a class="reference internal" href="stateless_transducer.html">Stateless Transducer</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../librispeech/index.html">LibriSpeech</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
|
||
<li class="toctree-l3"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
|
||
</ul>
|
||
</li>
|
||
<li class="toctree-l2"><a class="reference internal" href="../../Streaming-ASR/index.html">Streaming ASR</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
<ul>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../contributing/index.html">Contributing</a></li>
|
||
<li class="toctree-l1"><a class="reference internal" href="../../../huggingface/index.html">Huggingface</a></li>
|
||
</ul>
|
||
|
||
</div>
|
||
</div>
|
||
</nav>
|
||
|
||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||
<a href="../../../index.html">icefall</a>
|
||
</nav>
|
||
|
||
<div class="wy-nav-content">
|
||
<div class="rst-content">
|
||
<div role="navigation" aria-label="Page navigation">
|
||
<ul class="wy-breadcrumbs">
|
||
<li><a href="../../../index.html" class="icon icon-home"></a></li>
|
||
<li class="breadcrumb-item"><a href="../../index.html">Recipes</a></li>
|
||
<li class="breadcrumb-item"><a href="../index.html">Non Streaming ASR</a></li>
|
||
<li class="breadcrumb-item"><a href="index.html">aishell</a></li>
|
||
<li class="breadcrumb-item active">Conformer CTC</li>
|
||
<li class="wy-breadcrumbs-aside">
|
||
<a href="https://github.com/k2-fsa/icefall/blob/master/docs/source/recipes/Non-streaming-ASR/aishell/conformer_ctc.rst" class="fa fa-github"> Edit on GitHub</a>
|
||
</li>
|
||
</ul>
|
||
<hr/>
|
||
</div>
|
||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||
<div itemprop="articleBody">
|
||
|
||
<section id="conformer-ctc">
|
||
<h1>Conformer CTC<a class="headerlink" href="#conformer-ctc" title="Permalink to this heading"></a></h1>
|
||
<p>This tutorial shows you how to run a conformer ctc model
|
||
with the <a class="reference external" href="https://www.openslr.org/33">Aishell</a> dataset.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>We assume you have read the page <a class="reference internal" href="../../../installation/index.html#install-icefall"><span class="std std-ref">Installation</span></a> and have setup
|
||
the environment for <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
</div>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>We recommend you to use a GPU or several GPUs to run this recipe.</p>
|
||
</div>
|
||
<p>In this tutorial, you will learn:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><ol class="arabic simple">
|
||
<li><p>How to prepare data for training and decoding</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="2">
|
||
<li><p>How to start the training, either with a single GPU or multiple GPUs</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="3">
|
||
<li><p>How to do decoding after training, with ctc-decoding, 1best and attention decoder rescoring</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="arabic simple" start="4">
|
||
<li><p>How to use a pre-trained model, provided by us</p></li>
|
||
</ol>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<section id="data-preparation">
|
||
<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./prepare.sh
|
||
</pre></div>
|
||
</div>
|
||
<p>The script <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> handles the data preparation for you, <strong>automagically</strong>.
|
||
All you need to do is to run it.</p>
|
||
<p>The data preparation contains several stages, you can use the following two
|
||
options:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stage</span></code></p></li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--stop-stage</span></code></p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>to control which stage(s) should be run. By default, all stages are executed.</p>
|
||
<p>For example,</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">0</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">0</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>means to run only stage 0.</p>
|
||
<p>To run stage 2 to stage 5, use:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./prepare.sh<span class="w"> </span>--stage<span class="w"> </span><span class="m">2</span><span class="w"> </span>--stop-stage<span class="w"> </span><span class="m">5</span>
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>If you have pre-downloaded the <a class="reference external" href="https://www.openslr.org/33">Aishell</a>
|
||
dataset and the <a class="reference external" href="http://www.openslr.org/17/">musan</a> dataset, say,
|
||
they are saved in <code class="docutils literal notranslate"><span class="pre">/tmp/aishell</span></code> and <code class="docutils literal notranslate"><span class="pre">/tmp/musan</span></code>, you can modify
|
||
the <code class="docutils literal notranslate"><span class="pre">dl_dir</span></code> variable in <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> to point to <code class="docutils literal notranslate"><span class="pre">/tmp</span></code> so that
|
||
<code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> won’t re-download them.</p>
|
||
</div>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>A 3-gram language model will be downloaded from huggingface, we assume you have
|
||
intalled and initialized <code class="docutils literal notranslate"><span class="pre">git-lfs</span></code>. If not, you could install <code class="docutils literal notranslate"><span class="pre">git-lfs</span></code> by</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>sudo<span class="w"> </span>apt-get<span class="w"> </span>install<span class="w"> </span>git-lfs
|
||
$<span class="w"> </span>git-lfs<span class="w"> </span>install
|
||
</pre></div>
|
||
</div>
|
||
<p>If you don’t have the <code class="docutils literal notranslate"><span class="pre">sudo</span></code> permission, you could download the
|
||
<a class="reference external" href="https://github.com/git-lfs/git-lfs/releases">git-lfs binary</a> here, then add it to you <code class="docutils literal notranslate"><span class="pre">PATH</span></code>.</p>
|
||
</div>
|
||
<div class="admonition note">
|
||
<p class="admonition-title">Note</p>
|
||
<p>All generated files by <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>, e.g., features, lexicon, etc,
|
||
are saved in <code class="docutils literal notranslate"><span class="pre">./data</span></code> directory.</p>
|
||
</div>
|
||
</section>
|
||
<section id="training">
|
||
<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
|
||
<section id="configurable-options">
|
||
<h3>Configurable options<a class="headerlink" href="#configurable-options" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows you the training options that can be passed from the commandline.
|
||
The following options are used quite often:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--exp-dir</span></code></p>
|
||
<p>The experiment folder to save logs and model checkpoints,
|
||
default <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp</span></code>.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--num-epochs</span></code></p>
|
||
<p>It is the number of epochs to train. For instance,
|
||
<code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span> <span class="pre">--num-epochs</span> <span class="pre">30</span></code> trains for 30 epochs
|
||
and generates <code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …, <code class="docutils literal notranslate"><span class="pre">epoch-29.pt</span></code>
|
||
in the folder set by <code class="docutils literal notranslate"><span class="pre">--exp-dir</span></code>.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--start-epoch</span></code></p>
|
||
<p>It’s used to resume training.
|
||
<code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span> <span class="pre">--start-epoch</span> <span class="pre">10</span></code> loads the
|
||
checkpoint <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp/epoch-9.pt</span></code> and starts
|
||
training from epoch 10, based on the state from epoch 9.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--world-size</span></code></p>
|
||
<p>It is used for multi-GPU single-machine DDP training.</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><ol class="loweralpha simple">
|
||
<li><p>If it is 1, then no DDP training is used.</p></li>
|
||
</ol>
|
||
</li>
|
||
<li><ol class="loweralpha simple" start="2">
|
||
<li><p>If it is 2, then GPU 0 and GPU 1 are used for DDP training.</p></li>
|
||
</ol>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>The following shows some use cases with it.</p>
|
||
<blockquote>
|
||
<div><p><strong>Use case 1</strong>: You have 4 GPUs, but you only want to use GPU 0 and
|
||
GPU 2 for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,2"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">2</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p><strong>Use case 2</strong>: You have 4 GPUs and you want to use all of them
|
||
for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">4</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p><strong>Use case 3</strong>: You have 4 GPUs but you only want to use GPU 3
|
||
for training. You can do the following:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"3"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">1</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</div></blockquote>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>Only multi-GPU single-machine DDP training is implemented at present.
|
||
Multi-GPU multi-machine DDP training will be added later.</p>
|
||
</div>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||
<p>It specifies the number of seconds over all utterances in a
|
||
batch, before <strong>padding</strong>.
|
||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||
your are using V100 NVIDIA GPU, we recommend you to set it to <code class="docutils literal notranslate"><span class="pre">200</span></code>.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>Due to padding, the number of seconds of all utterances in a
|
||
batch will usually be larger than <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code>.</p>
|
||
<p>A larger value for <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> may cause OOM during training,
|
||
while a smaller value may increase the training time. You have to
|
||
tune it.</p>
|
||
</div>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="pre-configured-options">
|
||
<h3>Pre-configured options<a class="headerlink" href="#pre-configured-options" title="Permalink to this heading"></a></h3>
|
||
<p>There are some training options, e.g., weight decay,
|
||
number of warmup steps, etc,
|
||
that are not passed from the commandline.
|
||
They are pre-configured by the function <code class="docutils literal notranslate"><span class="pre">get_params()</span></code> in
|
||
<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/conformer_ctc/train.py">conformer_ctc/train.py</a></p>
|
||
<p>You don’t need to change these pre-configured parameters. If you really need to change
|
||
them, please modify <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/train.py</span></code> directly.</p>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||
Each epoch actually processes <code class="docutils literal notranslate"><span class="pre">3x150</span> <span class="pre">==</span> <span class="pre">450</span></code> hours of data.</p>
|
||
</div>
|
||
</section>
|
||
<section id="training-logs">
|
||
<h3>Training logs<a class="headerlink" href="#training-logs" title="Permalink to this heading"></a></h3>
|
||
<p>Training logs and checkpoints are saved in the folder set by <code class="docutils literal notranslate"><span class="pre">--exp-dir</span></code>
|
||
(default <code class="docutils literal notranslate"><span class="pre">conformer_ctc/exp</span></code>). You will find the following files in that directory:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …</p>
|
||
<p>These are checkpoint files, containing model <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
|
||
To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">epoch-10.pt</span></code>, you can use:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--start-epoch<span class="w"> </span><span class="m">11</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">tensorboard/</span></code></p>
|
||
<p>This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||
rate, etc, are recorded in these logs. You can visualize them by:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>conformer_ctc/exp/tensorboard
|
||
$<span class="w"> </span>tensorboard<span class="w"> </span>dev<span class="w"> </span>upload<span class="w"> </span>--logdir<span class="w"> </span>.<span class="w"> </span>--name<span class="w"> </span><span class="s2">"Aishell conformer ctc training with icefall"</span><span class="w"> </span>--description<span class="w"> </span><span class="s2">"Training with new LabelSmoothing loss, see https://github.com/k2-fsa/icefall/pull/109"</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>It will print something like below:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">TensorFlow</span> <span class="n">installation</span> <span class="ow">not</span> <span class="n">found</span> <span class="o">-</span> <span class="n">running</span> <span class="k">with</span> <span class="n">reduced</span> <span class="n">feature</span> <span class="nb">set</span><span class="o">.</span>
|
||
<span class="n">Upload</span> <span class="n">started</span> <span class="ow">and</span> <span class="n">will</span> <span class="k">continue</span> <span class="n">reading</span> <span class="nb">any</span> <span class="n">new</span> <span class="n">data</span> <span class="k">as</span> <span class="n">it</span><span class="s1">'s added to the logdir.</span>
|
||
|
||
<span class="n">To</span> <span class="n">stop</span> <span class="n">uploading</span><span class="p">,</span> <span class="n">press</span> <span class="n">Ctrl</span><span class="o">-</span><span class="n">C</span><span class="o">.</span>
|
||
|
||
<span class="n">New</span> <span class="n">experiment</span> <span class="n">created</span><span class="o">.</span> <span class="n">View</span> <span class="n">your</span> <span class="n">TensorBoard</span> <span class="n">at</span><span class="p">:</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">tensorboard</span><span class="o">.</span><span class="n">dev</span><span class="o">/</span><span class="n">experiment</span><span class="o">/</span><span class="n">engw8KSkTZqS24zBV5dgCg</span><span class="o">/</span>
|
||
|
||
<span class="p">[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">22</span><span class="n">T11</span><span class="p">:</span><span class="mi">09</span><span class="p">:</span><span class="mi">27</span><span class="p">]</span> <span class="n">Started</span> <span class="n">scanning</span> <span class="n">logdir</span><span class="o">.</span>
|
||
<span class="p">[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">22</span><span class="n">T11</span><span class="p">:</span><span class="mi">10</span><span class="p">:</span><span class="mi">14</span><span class="p">]</span> <span class="n">Total</span> <span class="n">uploaded</span><span class="p">:</span> <span class="mi">116068</span> <span class="n">scalars</span><span class="p">,</span> <span class="mi">0</span> <span class="n">tensors</span><span class="p">,</span> <span class="mi">0</span> <span class="n">binary</span> <span class="n">objects</span>
|
||
<span class="n">Listening</span> <span class="k">for</span> <span class="n">new</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">logdir</span><span class="o">...</span>
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>Note there is a URL in the above output, click it and you will see
|
||
the following screenshot:</p>
|
||
<blockquote>
|
||
<div><figure class="align-center" id="id4">
|
||
<a class="reference external image-reference" href="https://tensorboard.dev/experiment/WE1DocDqRRCOSAgmGyClhg/"><img alt="TensorBoard screenshot" src="../../../_images/aishell-conformer-ctc-tensorboard-log.jpg" style="width: 600px;" /></a>
|
||
<figcaption>
|
||
<p><span class="caption-number">Fig. 2 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id4" title="Permalink to this image"></a></p>
|
||
</figcaption>
|
||
</figure>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">log/log-train-xxxx</span></code></p>
|
||
<p>It is the detailed training log in text format, same as the one
|
||
you saw printed to the console during training.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="usage-examples">
|
||
<h3>Usage examples<a class="headerlink" href="#usage-examples" title="Permalink to this heading"></a></h3>
|
||
<p>The following shows typical use cases:</p>
|
||
<section id="case-1">
|
||
<h4><strong>Case 1</strong><a class="headerlink" href="#case-1" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--max-duration<span class="w"> </span><span class="m">200</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It uses <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> of 200 to avoid OOM.</p>
|
||
</section>
|
||
<section id="case-2">
|
||
<h4><strong>Case 2</strong><a class="headerlink" href="#case-2" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span><span class="nb">export</span><span class="w"> </span><span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,3"</span>
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--world-size<span class="w"> </span><span class="m">2</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It uses GPU 0 and GPU 3 for DDP training.</p>
|
||
</section>
|
||
<section id="case-3">
|
||
<h4><strong>Case 3</strong><a class="headerlink" href="#case-3" title="Permalink to this heading"></a></h4>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/train.py<span class="w"> </span>--num-epochs<span class="w"> </span><span class="m">10</span><span class="w"> </span>--start-epoch<span class="w"> </span><span class="m">3</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>It loads checkpoint <code class="docutils literal notranslate"><span class="pre">./conformer_ctc/exp/epoch-2.pt</span></code> and starts
|
||
training from epoch 3. Also, it trains for 10 epochs.</p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="decoding">
|
||
<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
|
||
<p>The decoding part uses checkpoints saved by the training part, so you have
|
||
to run the training part first.</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/decode.py<span class="w"> </span>--help
|
||
</pre></div>
|
||
</div>
|
||
<p>shows the options for decoding.</p>
|
||
<p>The commonly used options are:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--method</span></code></p>
|
||
<p>This specifies the decoding method.</p>
|
||
<p>The following command uses attention decoder for rescoring:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||
$ ./conformer_ctc/decode.py --method attention-decoder --max-duration 30 --nbest-scale 0.5
|
||
</pre></div>
|
||
</div>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--nbest-scale</span></code></p>
|
||
<p>It is used to scale down lattice scores so that there are more unique
|
||
paths for rescoring.</p>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||
<p>It has the same meaning as the one during training. A larger
|
||
value may cause OOM.</p>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
</section>
|
||
<section id="pre-trained-model">
|
||
<h2>Pre-trained Model<a class="headerlink" href="#pre-trained-model" title="Permalink to this heading"></a></h2>
|
||
<p>We have uploaded a pre-trained model to
|
||
<a class="reference external" href="https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc">https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc</a>.</p>
|
||
<p>We describe how to use the pre-trained model to transcribe a sound file or
|
||
multiple sound files in the following.</p>
|
||
<section id="install-kaldifeat">
|
||
<h3>Install kaldifeat<a class="headerlink" href="#install-kaldifeat" title="Permalink to this heading"></a></h3>
|
||
<p><a class="reference external" href="https://github.com/csukuangfj/kaldifeat">kaldifeat</a> is used to
|
||
extract features for a single sound file or multiple sound files
|
||
at the same time.</p>
|
||
<p>Please refer to <a class="reference external" href="https://github.com/csukuangfj/kaldifeat">https://github.com/csukuangfj/kaldifeat</a> for installation.</p>
|
||
</section>
|
||
<section id="download-the-pre-trained-model">
|
||
<h3>Download the pre-trained model<a class="headerlink" href="#download-the-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||
<p>The following commands describe how to download the pre-trained model:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||
$ mkdir tmp
|
||
$ cd tmp
|
||
$ git lfs install
|
||
$ git clone https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>You have to use <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">lfs</span></code> to download the pre-trained model.</p>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>In order to use this pre-trained model, your k2 version has to be v1.7 or later.</p>
|
||
</div>
|
||
<p>After downloading, you will have the following files:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>tree<span class="w"> </span>tmp
|
||
</pre></div>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>tmp/
|
||
<span class="sb">`</span>--<span class="w"> </span>icefall_asr_aishell_conformer_ctc
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>README.md
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>data
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>lang_char
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>HLG.pt
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="p">|</span>--<span class="w"> </span>tokens.txt
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>words.txt
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>exp
|
||
<span class="w"> </span><span class="p">|</span><span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>pretrained.pt
|
||
<span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>test_waves
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>BAC009S0764W0121.wav
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>BAC009S0764W0122.wav
|
||
<span class="w"> </span><span class="p">|</span>--<span class="w"> </span>BAC009S0764W0123.wav
|
||
<span class="w"> </span><span class="sb">`</span>--<span class="w"> </span>trans.txt
|
||
|
||
<span class="m">5</span><span class="w"> </span>directories,<span class="w"> </span><span class="m">9</span><span class="w"> </span>files
|
||
</pre></div>
|
||
</div>
|
||
<p><strong>File descriptions</strong>:</p>
|
||
<blockquote>
|
||
<div><ul>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_char/HLG.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It is the decoding graph.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_char/tokens.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains tokens and their IDs.
|
||
Provided only for convenience so that you can look up the SOS/EOS ID easily.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_char/words.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains words and their IDs.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">exp/pretrained.pt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains pre-trained model parameters, obtained by averaging
|
||
checkpoints from <code class="docutils literal notranslate"><span class="pre">epoch-25.pt</span></code> to <code class="docutils literal notranslate"><span class="pre">epoch-84.pt</span></code>.
|
||
Note: We have removed optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> to reduce file size.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/*.wav</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains some test sound files from Aishell <code class="docutils literal notranslate"><span class="pre">test</span></code> dataset.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/trans.txt</span></code></p>
|
||
<blockquote>
|
||
<div><p>It contains the reference transcripts for the sound files in <cite>test_waves/</cite>.</p>
|
||
</div></blockquote>
|
||
</li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<p>The information of the test sound files is listed below:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>soxi<span class="w"> </span>tmp/icefall_asr_aishell_conformer_ctc/test_waves/*.wav
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:04.20<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">67263</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">315</span>.295<span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>135k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:04.12<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">65840</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">308</span>.625<span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>132k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
|
||
Input<span class="w"> </span>File<span class="w"> </span>:<span class="w"> </span><span class="s1">'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span>
|
||
Channels<span class="w"> </span>:<span class="w"> </span><span class="m">1</span>
|
||
Sample<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span><span class="m">16000</span>
|
||
Precision<span class="w"> </span>:<span class="w"> </span><span class="m">16</span>-bit
|
||
Duration<span class="w"> </span>:<span class="w"> </span><span class="m">00</span>:00:04.00<span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="m">64000</span><span class="w"> </span>samples<span class="w"> </span>~<span class="w"> </span><span class="m">300</span><span class="w"> </span>CDDA<span class="w"> </span>sectors
|
||
File<span class="w"> </span>Size<span class="w"> </span>:<span class="w"> </span>128k
|
||
Bit<span class="w"> </span>Rate<span class="w"> </span>:<span class="w"> </span>256k
|
||
Sample<span class="w"> </span>Encoding:<span class="w"> </span><span class="m">16</span>-bit<span class="w"> </span>Signed<span class="w"> </span>Integer<span class="w"> </span>PCM
|
||
|
||
Total<span class="w"> </span>Duration<span class="w"> </span>of<span class="w"> </span><span class="m">3</span><span class="w"> </span>files:<span class="w"> </span><span class="m">00</span>:00:12.32
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="usage">
|
||
<h3>Usage<a class="headerlink" href="#usage" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||
$ ./conformer_ctc/pretrained.py --help
|
||
</pre></div>
|
||
</div>
|
||
<p>displays the help information.</p>
|
||
<p>It supports three decoding methods:</p>
|
||
<blockquote>
|
||
<div><ul class="simple">
|
||
<li><p>CTC decoding</p></li>
|
||
<li><p>HLG decoding</p></li>
|
||
<li><p>HLG + attention decoder rescoring</p></li>
|
||
</ul>
|
||
</div></blockquote>
|
||
<section id="ctc-decoding">
|
||
<h4>CTC decoding<a class="headerlink" href="#ctc-decoding" title="Permalink to this heading"></a></h4>
|
||
<p>CTC decoding only uses the ctc topology for decoding without a lexicon and language model</p>
|
||
<p>The command to run CTC decoding is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--tokens-file<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>ctc-decoding<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is given below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">41</span><span class="p">,</span><span class="mi">707</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">229</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'f2fd997f752ed11bbef4c306652c433e83f9cf12'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Sun Sep 19 09:41:46 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+git.33cfe45.clean'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'aishell'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'d57a873-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 17 19:53:25 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/icefall_aishell3'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'</span><span class="p">},</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'tokens_file'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt'</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'ctc-decoding'</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">0.3</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">0.9</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">4336</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">41</span><span class="p">,</span><span class="mi">708</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">240</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">41</span><span class="p">,</span><span class="mi">708</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">242</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">131</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">259</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">134</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">269</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">138</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">275</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">241</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">293</span><span class="p">]</span> <span class="n">Use</span> <span class="n">CTC</span> <span class="n">decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">704</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">369</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0121</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">甚</span> <span class="n">至</span> <span class="n">出</span> <span class="n">现</span> <span class="n">交</span> <span class="n">易</span> <span class="n">几</span> <span class="n">乎</span> <span class="n">停</span> <span class="n">止</span> <span class="n">的</span> <span class="n">情</span> <span class="n">况</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0122</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">一</span> <span class="n">二</span> <span class="n">线</span> <span class="n">城</span> <span class="n">市</span> <span class="n">虽</span> <span class="n">然</span> <span class="n">也</span> <span class="n">处</span> <span class="n">于</span> <span class="n">调</span> <span class="n">整</span> <span class="n">中</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0123</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">但</span> <span class="n">因</span> <span class="n">为</span> <span class="n">聚</span> <span class="n">集</span> <span class="n">了</span> <span class="n">过</span> <span class="n">多</span> <span class="n">公</span> <span class="n">共</span> <span class="n">资</span> <span class="n">源</span>
|
||
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">704</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">371</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding">
|
||
<h4>HLG decoding<a class="headerlink" href="#hlg-decoding" title="Permalink to this heading"></a></h4>
|
||
<p>HLG decoding uses the best path of the decoding lattice as the decoding result.</p>
|
||
<p>The command to run HLG decoding is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>1best<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is given below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">38</span><span class="p">,</span><span class="mi">683</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">229</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'f2fd997f752ed11bbef4c306652c433e83f9cf12'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Sun Sep 19 09:41:46 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+git.33cfe45.clean'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'aishell'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'d57a873-clean'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 17 19:53:25 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/icefall_aishell3'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'</span><span class="p">},</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'tokens_file'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt'</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt'</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'1best'</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">0.3</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">0.9</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">4336</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">38</span><span class="p">,</span><span class="mi">684</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">240</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">38</span><span class="p">,</span><span class="mi">684</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">242</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">47</span><span class="p">,</span><span class="mi">651</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">259</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">47</span><span class="p">,</span><span class="mi">654</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">269</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">47</span><span class="p">,</span><span class="mi">659</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">275</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">47</span><span class="p">,</span><span class="mi">752</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">321</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_char</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">51</span><span class="p">,</span><span class="mi">887</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">340</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">52</span><span class="p">,</span><span class="mi">102</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">370</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0121</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">甚至</span> <span class="n">出现</span> <span class="n">交易</span> <span class="n">几乎</span> <span class="n">停止</span> <span class="n">的</span> <span class="n">情况</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0122</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">一二</span> <span class="n">线</span> <span class="n">城市</span> <span class="n">虽然</span> <span class="n">也</span> <span class="n">处于</span> <span class="n">调整</span> <span class="n">中</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0123</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">但</span> <span class="n">因为</span> <span class="n">聚集</span> <span class="n">了</span> <span class="n">过多</span> <span class="n">公共</span> <span class="n">资源</span>
|
||
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">52</span><span class="p">,</span><span class="mi">102</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">372</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
<section id="hlg-decoding-attention-decoder-rescoring">
|
||
<h4>HLG decoding + attention decoder rescoring<a class="headerlink" href="#hlg-decoding-attention-decoder-rescoring" title="Permalink to this heading"></a></h4>
|
||
<p>It extracts n paths from the lattice, recores the extracted paths with
|
||
an attention decoder. The path with the highest score is the decoding result.</p>
|
||
<p>The command to run HLG decoding + attention decoder rescoring is:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>egs/aishell/ASR
|
||
$<span class="w"> </span>./conformer_ctc/pretrained.py<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--checkpoint<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--words-file<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--HLG<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--method<span class="w"> </span>attention-decoder<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is below:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">05</span><span class="p">,</span><span class="mi">965</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">229</span><span class="p">]</span> <span class="p">{</span><span class="s1">'sample_rate'</span><span class="p">:</span> <span class="mi">16000</span><span class="p">,</span> <span class="s1">'subsampling_factor'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'feature_dim'</span><span class="p">:</span> <span class="mi">80</span><span class="p">,</span> <span class="s1">'nhead'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'attention_dim'</span><span class="p">:</span> <span class="mi">512</span><span class="p">,</span> <span class="s1">'num_decoder_layers'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span> <span class="s1">'vgg_frontend'</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span> <span class="s1">'use_feat_batchnorm'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'search_beam'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'output_beam'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'min_active_states'</span><span class="p">:</span> <span class="mi">30</span><span class="p">,</span> <span class="s1">'max_active_states'</span><span class="p">:</span> <span class="mi">10000</span><span class="p">,</span> <span class="s1">'use_double_scores'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'env_info'</span><span class="p">:</span> <span class="p">{</span><span class="s1">'k2-version'</span><span class="p">:</span> <span class="s1">'1.9'</span><span class="p">,</span> <span class="s1">'k2-build-type'</span><span class="p">:</span> <span class="s1">'Release'</span><span class="p">,</span> <span class="s1">'k2-with-cuda'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'k2-git-sha1'</span><span class="p">:</span> <span class="s1">'f2fd997f752ed11bbef4c306652c433e83f9cf12'</span><span class="p">,</span> <span class="s1">'k2-git-date'</span><span class="p">:</span> <span class="s1">'Sun Sep 19 09:41:46 2021'</span><span class="p">,</span> <span class="s1">'lhotse-version'</span><span class="p">:</span> <span class="s1">'0.11.0.dev+git.33cfe45.clean'</span><span class="p">,</span> <span class="s1">'torch-cuda-available'</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span> <span class="s1">'torch-cuda-version'</span><span class="p">:</span> <span class="s1">'10.1'</span><span class="p">,</span> <span class="s1">'python-version'</span><span class="p">:</span> <span class="s1">'3.8'</span><span class="p">,</span> <span class="s1">'icefall-git-branch'</span><span class="p">:</span> <span class="s1">'aishell'</span><span class="p">,</span> <span class="s1">'icefall-git-sha1'</span><span class="p">:</span> <span class="s1">'d57a873-dirty'</span><span class="p">,</span> <span class="s1">'icefall-git-date'</span><span class="p">:</span> <span class="s1">'Wed Nov 17 19:53:25 2021'</span><span class="p">,</span> <span class="s1">'icefall-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/icefall_aishell3'</span><span class="p">,</span> <span class="s1">'k2-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py'</span><span class="p">,</span> <span class="s1">'lhotse-path'</span><span class="p">:</span> <span class="s1">'/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'</span><span class="p">},</span> <span class="s1">'checkpoint'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt'</span><span class="p">,</span> <span class="s1">'tokens_file'</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">'words_file'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt'</span><span class="p">,</span> <span class="s1">'HLG'</span><span class="p">:</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt'</span><span class="p">,</span> <span class="s1">'method'</span><span class="p">:</span> <span class="s1">'attention-decoder'</span><span class="p">,</span> <span class="s1">'num_paths'</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">'ngram_lm_scale'</span><span class="p">:</span> <span class="mf">0.3</span><span class="p">,</span> <span class="s1">'attention_decoder_scale'</span><span class="p">:</span> <span class="mf">0.9</span><span class="p">,</span> <span class="s1">'nbest_scale'</span><span class="p">:</span> <span class="mf">0.5</span><span class="p">,</span> <span class="s1">'sos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'eos_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'num_classes'</span><span class="p">:</span> <span class="mi">4336</span><span class="p">,</span> <span class="s1">'sound_files'</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]}</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">05</span><span class="p">,</span><span class="mi">966</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">240</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">05</span><span class="p">,</span><span class="mi">966</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">242</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">16</span><span class="p">,</span><span class="mi">821</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">259</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">16</span><span class="p">,</span><span class="mi">822</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">269</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">16</span><span class="p">,</span><span class="mi">826</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">275</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">16</span><span class="p">,</span><span class="mi">916</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">321</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_char</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">21</span><span class="p">,</span><span class="mi">115</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">345</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="o">+</span> <span class="n">attention</span> <span class="n">decoder</span> <span class="n">rescoring</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">21</span><span class="p">,</span><span class="mi">888</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">370</span><span class="p">]</span>
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0121</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">甚至</span> <span class="n">出现</span> <span class="n">交易</span> <span class="n">几乎</span> <span class="n">停止</span> <span class="n">的</span> <span class="n">情况</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0122</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">一二</span> <span class="n">线</span> <span class="n">城市</span> <span class="n">虽然</span> <span class="n">也</span> <span class="n">处于</span> <span class="n">调整</span> <span class="n">中</span>
|
||
|
||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0123</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||
<span class="n">但</span> <span class="n">因为</span> <span class="n">聚集</span> <span class="n">了</span> <span class="n">过多</span> <span class="n">公共</span> <span class="n">资源</span>
|
||
|
||
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">07</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">21</span><span class="p">,</span><span class="mi">889</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">372</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||
</pre></div>
|
||
</div>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
<section id="colab-notebook">
|
||
<h2>Colab notebook<a class="headerlink" href="#colab-notebook" title="Permalink to this heading"></a></h2>
|
||
<p>We do provide a colab notebook for this recipe showing how to use a pre-trained model.</p>
|
||
<p><a class="reference external" href="https://colab.research.google.com/drive/1WnG17io5HEZ0Gn_cnh_VzK5QYOoiiklC"><img alt="aishell asr conformer ctc colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>Due to limited memory provided by Colab, you have to upgrade to Colab Pro to
|
||
run <code class="docutils literal notranslate"><span class="pre">HLG</span> <span class="pre">decoding</span> <span class="pre">+</span> <span class="pre">attention</span> <span class="pre">decoder</span> <span class="pre">rescoring</span></code>.
|
||
Otherwise, you can only run <code class="docutils literal notranslate"><span class="pre">HLG</span> <span class="pre">decoding</span></code> with Colab.</p>
|
||
</div>
|
||
<p><strong>Congratulations!</strong> You have finished the aishell ASR recipe with
|
||
conformer CTC models in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||
<p>If you want to deploy your trained model in C++, please read the following section.</p>
|
||
</section>
|
||
<section id="deployment-with-c">
|
||
<h2>Deployment with C++<a class="headerlink" href="#deployment-with-c" title="Permalink to this heading"></a></h2>
|
||
<p>This section describes how to deploy the pre-trained model in C++, without
|
||
Python dependencies.</p>
|
||
<div class="admonition hint">
|
||
<p class="admonition-title">Hint</p>
|
||
<p>At present, it does NOT support streaming decoding.</p>
|
||
</div>
|
||
<p>First, let us compile k2 from source:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span><span class="nv">$HOME</span>
|
||
$<span class="w"> </span>git<span class="w"> </span>clone<span class="w"> </span>https://github.com/k2-fsa/k2
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>k2
|
||
$<span class="w"> </span>git<span class="w"> </span>checkout<span class="w"> </span>v2.0-pre
|
||
</pre></div>
|
||
</div>
|
||
<div class="admonition caution">
|
||
<p class="admonition-title">Caution</p>
|
||
<p>You have to switch to the branch <code class="docutils literal notranslate"><span class="pre">v2.0-pre</span></code>!</p>
|
||
</div>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span>mkdir<span class="w"> </span>build-release
|
||
$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>build-release
|
||
$<span class="w"> </span>cmake<span class="w"> </span>-DCMAKE_BUILD_TYPE<span class="o">=</span>Release<span class="w"> </span>..
|
||
$<span class="w"> </span>make<span class="w"> </span>-j<span class="w"> </span>hlg_decode
|
||
|
||
<span class="c1"># You will find four binaries in `./bin`, i.e. ./bin/hlg_decode,</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>Now you are ready to go!</p>
|
||
<p>Assume you have run:</p>
|
||
<blockquote>
|
||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$<span class="w"> </span><span class="nb">cd</span><span class="w"> </span>k2/build-release
|
||
$<span class="w"> </span>ln<span class="w"> </span>-s<span class="w"> </span>/path/to/icefall-asr-aishell-conformer-ctc<span class="w"> </span>./
|
||
</pre></div>
|
||
</div>
|
||
</div></blockquote>
|
||
<p>To view the usage of <code class="docutils literal notranslate"><span class="pre">./bin/hlg_decode</span></code>, run:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ ./bin/hlg_decode
|
||
</pre></div>
|
||
</div>
|
||
<p>It will show you the following message:</p>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>Please<span class="w"> </span>provide<span class="w"> </span>--nn_model
|
||
|
||
This<span class="w"> </span>file<span class="w"> </span>implements<span class="w"> </span>decoding<span class="w"> </span>with<span class="w"> </span>an<span class="w"> </span>HLG<span class="w"> </span>decoding<span class="w"> </span>graph.
|
||
|
||
Usage:
|
||
<span class="w"> </span>./bin/hlg_decode<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>torch<span class="w"> </span>scripted<span class="w"> </span>pt<span class="w"> </span>file><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--hlg<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>HLG.pt><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--word_table<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>words.txt><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>foo.wav><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><path<span class="w"> </span>to<span class="w"> </span>bar.wav><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span><more<span class="w"> </span>waves<span class="w"> </span><span class="k">if</span><span class="w"> </span>any>
|
||
|
||
To<span class="w"> </span>see<span class="w"> </span>all<span class="w"> </span>possible<span class="w"> </span>options,<span class="w"> </span>use
|
||
<span class="w"> </span>./bin/hlg_decode<span class="w"> </span>--help
|
||
|
||
Caution:
|
||
<span class="w"> </span>-<span class="w"> </span>Only<span class="w"> </span>sound<span class="w"> </span>files<span class="w"> </span><span class="o">(</span>*.wav<span class="o">)</span><span class="w"> </span>with<span class="w"> </span>single<span class="w"> </span>channel<span class="w"> </span>are<span class="w"> </span>supported.
|
||
<span class="w"> </span>-<span class="w"> </span>It<span class="w"> </span>assumes<span class="w"> </span>the<span class="w"> </span>model<span class="w"> </span>is<span class="w"> </span>conformer_ctc/transformer.py<span class="w"> </span>from<span class="w"> </span>icefall.
|
||
<span class="w"> </span>If<span class="w"> </span>you<span class="w"> </span>use<span class="w"> </span>a<span class="w"> </span>different<span class="w"> </span>model,<span class="w"> </span>you<span class="w"> </span>have<span class="w"> </span>to<span class="w"> </span>change<span class="w"> </span>the<span class="w"> </span>code
|
||
<span class="w"> </span>related<span class="w"> </span>to<span class="w"> </span><span class="sb">`</span>model.forward<span class="sb">`</span><span class="w"> </span><span class="k">in</span><span class="w"> </span>this<span class="w"> </span>file.
|
||
</pre></div>
|
||
</div>
|
||
<section id="id2">
|
||
<h3>HLG decoding<a class="headerlink" href="#id2" title="Permalink to this heading"></a></h3>
|
||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>./bin/hlg_decode<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--use_gpu<span class="w"> </span><span class="nb">true</span><span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--nn_model<span class="w"> </span>icefall_asr_aishell_conformer_ctc/exp/cpu_jit.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--hlg<span class="w"> </span>icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>--word_table<span class="w"> </span>icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav<span class="w"> </span><span class="se">\</span>
|
||
<span class="w"> </span>icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||
</pre></div>
|
||
</div>
|
||
<p>The output is:</p>
|
||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.89</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">115</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Device</span><span class="p">:</span> <span class="n">cpu</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.89</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">124</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">wave</span> <span class="n">files</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.97</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">131</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Build</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.98</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">142</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">features</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.115</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">150</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">neural</span> <span class="n">network</span> <span class="n">model</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">20.693</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">165</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Compute</span> <span class="n">nnet_output</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">23.182</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">180</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Load</span> <span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_char</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">33.489</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">185</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span> <span class="n">Decoding</span>
|
||
<span class="mi">2021</span><span class="o">-</span><span class="mi">11</span><span class="o">-</span><span class="mi">18</span> <span class="mi">14</span><span class="p">:</span><span class="mi">48</span><span class="p">:</span><span class="mf">45.217</span> <span class="p">[</span><span class="n">I</span><span class="p">]</span> <span class="n">k2</span><span class="o">/</span><span class="n">torch</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hlg_decode</span><span class="o">.</span><span class="n">cu</span><span class="p">:</span><span class="mi">216</span><span class="p">:</span><span class="nb">int</span> <span class="n">main</span><span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">char</span><span class="o">**</span><span class="p">)</span>
|
||
<span class="n">Decoding</span> <span class="n">result</span><span class="p">:</span>
|
||
|
||
<span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0121</span><span class="o">.</span><span class="n">wav</span>
|
||
<span class="n">甚至</span> <span class="n">出现</span> <span class="n">交易</span> <span class="n">几乎</span> <span class="n">停止</span> <span class="n">的</span> <span class="n">情况</span>
|
||
|
||
<span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0122</span><span class="o">.</span><span class="n">wav</span>
|
||
<span class="n">一二</span> <span class="n">线</span> <span class="n">城市</span> <span class="n">虽然</span> <span class="n">也</span> <span class="n">处于</span> <span class="n">调整</span> <span class="n">中</span>
|
||
|
||
<span class="n">icefall_asr_aishell_conformer_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0123</span><span class="o">.</span><span class="n">wav</span>
|
||
<span class="n">但</span> <span class="n">因为</span> <span class="n">聚集</span> <span class="n">了</span> <span class="n">过多</span> <span class="n">公共</span> <span class="n">资源</span>
|
||
</pre></div>
|
||
</div>
|
||
<p>There is a Colab notebook showing you how to run a torch scripted model in C++.
|
||
Please see <a class="reference external" href="https://colab.research.google.com/drive/1Vh7RER7saTW01DtNbvr7CY7ovNZgmfWz?usp=sharing"><img alt="aishell asr conformer ctc torch script colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||
</section>
|
||
</section>
|
||
</section>
|
||
|
||
|
||
</div>
|
||
</div>
|
||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||
<a href="tdnn_lstm_ctc.html" class="btn btn-neutral float-left" title="TDNN-LSTM CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||
<a href="stateless_transducer.html" class="btn btn-neutral float-right" title="Stateless Transducer" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||
</div>
|
||
|
||
<hr/>
|
||
|
||
<div role="contentinfo">
|
||
<p>© Copyright 2021, icefall development team.</p>
|
||
</div>
|
||
|
||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||
|
||
|
||
</footer>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
</div>
|
||
<script>
|
||
jQuery(function () {
|
||
SphinxRtdTheme.Navigation.enable(true);
|
||
});
|
||
</script>
|
||
|
||
</body>
|
||
</html> |