deploy: d99796898cc369123dfdea8a0f660fe174a33c35
4
.buildinfo
Normal file
@ -0,0 +1,4 @@
|
||||
# Sphinx build info version 1
|
||||
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
|
||||
config: cfc3e6ecc44ed7573f700065af8738a7
|
||||
tags: 645f666f9bcd5a90fca523b33c5a78b7
|
BIN
_images/aishell-conformer-ctc-tensorboard-log.jpg
Normal file
After Width: | Height: | Size: 334 KiB |
BIN
_images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg
Normal file
After Width: | Height: | Size: 426 KiB |
After Width: | Height: | Size: 441 KiB |
1
_images/device-CPU_CUDA-orange.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="122" height="20" role="img" aria-label="device: CPU | CUDA"><title>device: CPU | CUDA</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="45" height="20" fill="#555"/><rect x="45" width="77" height="20" fill="#fe7d37"/><rect width="122" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="235" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="350">device</text><text x="235" y="140" transform="scale(.1)" fill="#fff" textLength="350">device</text><text aria-hidden="true" x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="670">CPU | CUDA</text><text x="825" y="140" transform="scale(.1)" fill="#fff" textLength="670">CPU | CUDA</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
BIN
_images/doc-contrib.png
Normal file
After Width: | Height: | Size: 198 KiB |
BIN
_images/hugging-face-sherpa-2.png
Normal file
After Width: | Height: | Size: 455 KiB |
BIN
_images/hugging-face-sherpa-3.png
Normal file
After Width: | Height: | Size: 392 KiB |
BIN
_images/hugging-face-sherpa.png
Normal file
After Width: | Height: | Size: 426 KiB |
1
_images/k2-gt-v1.9-blueviolet.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
BIN
_images/librispeech-conformer-ctc-tensorboard-log.png
Normal file
After Width: | Height: | Size: 422 KiB |
BIN
_images/logo.png
Normal file
After Width: | Height: | Size: 666 KiB |
1
_images/os-Linux_macOS-ff69b4.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="114" height="20" role="img" aria-label="os: Linux | macOS"><title>os: Linux | macOS</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="114" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="91" height="20" fill="#ff69b4"/><rect width="114" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">os</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">os</text><text aria-hidden="true" x="675" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="810">Linux | macOS</text><text x="675" y="140" transform="scale(.1)" fill="#fff" textLength="810">Linux | macOS</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
BIN
_images/pre-commit-check-success.png
Normal file
After Width: | Height: | Size: 153 KiB |
BIN
_images/pre-commit-check.png
Normal file
After Width: | Height: | Size: 214 KiB |
1
_images/python-gt-v3.6-blue.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: >= 3.6"><title>python: >= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">>= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">>= 3.6</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
BIN
_images/tdnn-tensorboard-log.png
Normal file
After Width: | Height: | Size: 121 KiB |
1
_images/torch-gt-v1.6.0-green.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: >= 1.6.0"><title>torch: >= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">>= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">>= 1.6.0</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
67
_sources/contributing/code-style.rst.txt
Normal file
@ -0,0 +1,67 @@
|
||||
.. _follow the code style:
|
||||
|
||||
Follow the code style
|
||||
=====================
|
||||
|
||||
We use the following tools to make the code style to be as consistent as possible:
|
||||
|
||||
- `black <https://github.com/psf/black>`_, to format the code
|
||||
- `flake8 <https://github.com/PyCQA/flake8>`_, to check the style and quality of the code
|
||||
- `isort <https://github.com/PyCQA/isort>`_, to sort ``imports``
|
||||
|
||||
The following versions of the above tools are used:
|
||||
|
||||
- ``black == 12.6b0``
|
||||
- ``flake8 == 3.9.2``
|
||||
- ``isort == 5.9.2``
|
||||
|
||||
After running the following commands:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ git clone https://github.com/k2-fsa/icefall
|
||||
$ cd icefall
|
||||
$ pip install pre-commit
|
||||
$ pre-commit install
|
||||
|
||||
it will run the following checks whenever you run ``git commit``, **automatically**:
|
||||
|
||||
.. figure:: images/pre-commit-check.png
|
||||
:width: 600
|
||||
:align: center
|
||||
|
||||
pre-commit hooks invoked by ``git commit`` (Failed).
|
||||
|
||||
If any of the above checks failed, your ``git commit`` was not successful.
|
||||
Please fix any issues reported by the check tools.
|
||||
|
||||
.. HINT::
|
||||
|
||||
Some of the check tools, i.e., ``black`` and ``isort`` will modify
|
||||
the files to be commited **in-place**. So please run ``git status``
|
||||
after failure to see which file has been modified by the tools
|
||||
before you make any further changes.
|
||||
|
||||
After fixing all the failures, run ``git commit`` again and
|
||||
it should succeed this time:
|
||||
|
||||
.. figure:: images/pre-commit-check-success.png
|
||||
:width: 600
|
||||
:align: center
|
||||
|
||||
pre-commit hooks invoked by ``git commit`` (Succeeded).
|
||||
|
||||
If you want to check the style of your code before ``git commit``, you
|
||||
can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd icefall
|
||||
$ pip install black==21.6b0 flake8==3.9.2 isort==5.9.2
|
||||
$ black --check your_changed_file.py
|
||||
$ black your_changed_file.py # modify it in-place
|
||||
$
|
||||
$ flake8 your_changed_file.py
|
||||
$
|
||||
$ isort --check your_changed_file.py # modify it in-place
|
||||
$ isort your_changed_file.py
|
45
_sources/contributing/doc.rst.txt
Normal file
@ -0,0 +1,45 @@
|
||||
Contributing to Documentation
|
||||
=============================
|
||||
|
||||
We use `sphinx <https://www.sphinx-doc.org/en/master/>`_
|
||||
for documentation.
|
||||
|
||||
Before writing documentation, you have to prepare the environment:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd docs
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
After setting up the environment, you are ready to write documentation.
|
||||
Please refer to `reStructuredText Primer <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_
|
||||
if you are not familiar with ``reStructuredText``.
|
||||
|
||||
After writing some documentation, you can build the documentation **locally**
|
||||
to preview what it looks like if it is published:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd docs
|
||||
$ make html
|
||||
|
||||
The generated documentation is in ``docs/build/html`` and can be viewed
|
||||
with the following commands:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd docs/build/html
|
||||
$ python3 -m http.server
|
||||
|
||||
It will print::
|
||||
|
||||
Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
|
||||
|
||||
Open your browser, go to `<http://0.0.0.0:8000/>`_, and you will see
|
||||
the following:
|
||||
|
||||
.. figure:: images/doc-contrib.png
|
||||
:width: 600
|
||||
:align: center
|
||||
|
||||
View generated documentation locally with ``python3 -m http.server``.
|
156
_sources/contributing/how-to-create-a-recipe.rst.txt
Normal file
@ -0,0 +1,156 @@
|
||||
How to create a recipe
|
||||
======================
|
||||
|
||||
.. HINT::
|
||||
|
||||
Please read :ref:`follow the code style` to adjust your code sytle.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
``icefall`` is designed to be as Pythonic as possible. Please use
|
||||
Python in your recipe if possible.
|
||||
|
||||
Data Preparation
|
||||
----------------
|
||||
|
||||
We recommend you to prepare your training/test/validate dataset
|
||||
with `lhotse <https://github.com/lhotse-speech/lhotse>`_.
|
||||
|
||||
Please refer to `<https://lhotse.readthedocs.io/en/latest/index.html>`_
|
||||
for how to create a recipe in ``lhotse``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
The ``yesno`` recipe in ``lhotse`` is a very good example.
|
||||
|
||||
Please refer to `<https://github.com/lhotse-speech/lhotse/pull/380>`_,
|
||||
which shows how to add a new recipe to ``lhotse``.
|
||||
|
||||
Suppose you would like to add a recipe for a dataset named ``foo``.
|
||||
You can do the following:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs
|
||||
$ mkdir -p foo/ASR
|
||||
$ cd foo/ASR
|
||||
$ touch prepare.sh
|
||||
$ chmod +x prepare.sh
|
||||
|
||||
If your dataset is very simple, please follow
|
||||
`egs/yesno/ASR/prepare.sh <https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh>`_
|
||||
to write your own ``prepare.sh``.
|
||||
Otherwise, please refer to
|
||||
`egs/librispeech/ASR/prepare.sh <https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh>`_
|
||||
to prepare your data.
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Assume you have a fancy model, called ``bar`` for the ``foo`` recipe, you can
|
||||
organize your files in the following way:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/foo/ASR
|
||||
$ mkdir bar
|
||||
$ cd bar
|
||||
$ touch README.md model.py train.py decode.py asr_datamodule.py pretrained.py
|
||||
|
||||
For instance , the ``yesno`` recipe has a ``tdnn`` model and its directory structure
|
||||
looks like the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
egs/yesno/ASR/tdnn/
|
||||
|-- README.md
|
||||
|-- asr_datamodule.py
|
||||
|-- decode.py
|
||||
|-- model.py
|
||||
|-- pretrained.py
|
||||
`-- train.py
|
||||
|
||||
**File description**:
|
||||
|
||||
- ``README.md``
|
||||
|
||||
It contains information of this recipe, e.g., how to run it, what the WER is, etc.
|
||||
|
||||
- ``asr_datamodule.py``
|
||||
|
||||
It provides code to create PyTorch dataloaders with train/test/validation dataset.
|
||||
|
||||
- ``decode.py``
|
||||
|
||||
It takes as inputs the checkpoints saved during the training stage to decode the test
|
||||
dataset(s).
|
||||
|
||||
- ``model.py``
|
||||
|
||||
It contains the definition of your fancy neural network model.
|
||||
|
||||
- ``pretrained.py``
|
||||
|
||||
We can use this script to do inference with a pre-trained model.
|
||||
|
||||
- ``train.py``
|
||||
|
||||
It contains training code.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
Please take a look at
|
||||
|
||||
- `egs/yesno/tdnn <https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn>`_
|
||||
- `egs/librispeech/tdnn_lstm_ctc <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/tdnn_lstm_ctc>`_
|
||||
- `egs/librispeech/conformer_ctc <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conformer_ctc>`_
|
||||
|
||||
to get a feel what the resulting files look like.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
Every model in a recipe is kept to be as self-contained as possible.
|
||||
We tolerate duplicate code among different recipes.
|
||||
|
||||
|
||||
The training stage should be invocable by:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/foo/ASR
|
||||
$ ./bar/train.py
|
||||
$ ./bar/train.py --help
|
||||
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
Please refer to
|
||||
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/decode.py>`_
|
||||
|
||||
If your model is transformer/conformer based.
|
||||
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/tdnn_lstm_ctc/decode.py>`_
|
||||
|
||||
If your model is TDNN/LSTM based, i.e., there is no attention decoder.
|
||||
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/tdnn/decode.py>`_
|
||||
|
||||
If there is no LM rescoring.
|
||||
|
||||
The decoding stage should be invocable by:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/foo/ASR
|
||||
$ ./bar/decode.py
|
||||
$ ./bar/decode.py --help
|
||||
|
||||
Pre-trained model
|
||||
-----------------
|
||||
|
||||
Please demonstrate how to use your model for inference in ``egs/foo/ASR/bar/pretrained.py``.
|
||||
If possible, please consider creating a Colab notebook to show that.
|
22
_sources/contributing/index.rst.txt
Normal file
@ -0,0 +1,22 @@
|
||||
Contributing
|
||||
============
|
||||
|
||||
Contributions to ``icefall`` are very welcomed.
|
||||
There are many possible ways to make contributions and
|
||||
two of them are:
|
||||
|
||||
- To write documentation
|
||||
- To write code
|
||||
|
||||
- (1) To follow the code style in the repository
|
||||
- (2) To write a new recipe
|
||||
|
||||
In this page, we describe how to contribute documentation
|
||||
and code to ``icefall``.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
doc
|
||||
code-style
|
||||
how-to-create-a-recipe
|
13
_sources/huggingface/index.rst.txt
Normal file
@ -0,0 +1,13 @@
|
||||
Huggingface
|
||||
===========
|
||||
|
||||
This section describes how to find pre-trained models.
|
||||
It also demonstrates how to try them from within your browser
|
||||
without installing anything by using
|
||||
`Huggingface spaces <https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
pretrained-models
|
||||
spaces
|
17
_sources/huggingface/pretrained-models.rst.txt
Normal file
@ -0,0 +1,17 @@
|
||||
Pre-trained models
|
||||
==================
|
||||
|
||||
We have uploaded pre-trained models for all recipes in ``icefall``
|
||||
to `<https://huggingface.co/>`_.
|
||||
|
||||
You can find them by visiting the following link:
|
||||
|
||||
`<https://huggingface.co/models?search=icefall>`_.
|
||||
|
||||
You can also find links of pre-trained models for a specific recipe
|
||||
by looking at the corresponding ``RESULTS.md``. For instance:
|
||||
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md>`_
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/RESULTS.md>`_
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/gigaspeech/ASR/RESULTS.md>`_
|
||||
- `<https://github.com/k2-fsa/icefall/blob/master/egs/wenetspeech/ASR/RESULTS.md>`_
|
65
_sources/huggingface/spaces.rst.txt
Normal file
@ -0,0 +1,65 @@
|
||||
Huggingface spaces
|
||||
==================
|
||||
|
||||
We have integrated the server framework
|
||||
`sherpa <http://github.com/k2-fsa/sherpa>`_
|
||||
with `Huggingface spaces <https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
|
||||
so that you can try pre-trained models from within your browser
|
||||
without the need to download or install anything.
|
||||
|
||||
All you need is a browser, which can be run on Windows, macOS, Linux, or even on your
|
||||
iPad and your phone.
|
||||
|
||||
Start your browser and visit the following address:
|
||||
|
||||
`<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
|
||||
|
||||
and you will see a page like the following screenshot:
|
||||
|
||||
.. image:: ./pic/hugging-face-sherpa.png
|
||||
:alt: screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
|
||||
:target: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
|
||||
|
||||
You can:
|
||||
|
||||
1. Select a language for recognition. Currently, we provide pre-trained models
|
||||
from ``icefall`` for the following languages: ``Chinese``, ``English``, and
|
||||
``Chinese+English``.
|
||||
2. After selecting the target language, you can select a pre-trained model
|
||||
corresponding to the language.
|
||||
3. Select the decoding method. Currently, it provides ``greedy search``
|
||||
and ``modified_beam_search``.
|
||||
4. If you selected ``modified_beam_search``, you can choose the number of
|
||||
active paths during the search.
|
||||
5. Either upload a file or record your speech for recognition.
|
||||
6. Click the button ``Submit for recognition``.
|
||||
7. Wait for a moment and you will get the recognition results.
|
||||
|
||||
The following screenshot shows an example when selecting ``Chinese+English``:
|
||||
|
||||
.. image:: ./pic/hugging-face-sherpa-3.png
|
||||
:alt: screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
|
||||
:target: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
|
||||
|
||||
|
||||
In the bottom part of the page, you can find a table of examples. You can click
|
||||
one of them and then click ``Submit for recognition``.
|
||||
|
||||
.. image:: ./pic/hugging-face-sherpa-2.png
|
||||
:alt: screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_
|
||||
:target: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
|
||||
|
||||
YouTube Video
|
||||
-------------
|
||||
|
||||
We provide the following YouTube video demonstrating how to use
|
||||
`<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_.
|
||||
|
||||
.. note::
|
||||
|
||||
To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
|
||||
the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
|
||||
|
||||
`<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
|
||||
|
||||
.. youtube:: ElN3r9dkKE4
|
26
_sources/index.rst.txt
Normal file
@ -0,0 +1,26 @@
|
||||
.. icefall documentation master file, created by
|
||||
sphinx-quickstart on Mon Aug 23 16:07:39 2021.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Icefall
|
||||
=======
|
||||
|
||||
.. image:: _static/logo.png
|
||||
:alt: icefall logo
|
||||
:width: 168px
|
||||
:align: center
|
||||
:target: https://github.com/k2-fsa/icefall
|
||||
|
||||
|
||||
Documentation for `icefall <https://github.com/k2-fsa/icefall>`_, containing
|
||||
speech recognition recipes using `k2 <https://github.com/k2-fsa/k2>`_.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
installation/index
|
||||
recipes/index
|
||||
contributing/index
|
||||
huggingface/index
|
492
_sources/installation/index.rst.txt
Normal file
@ -0,0 +1,492 @@
|
||||
.. _install icefall:
|
||||
|
||||
Installation
|
||||
============
|
||||
|
||||
- |os|
|
||||
- |device|
|
||||
- |python_versions|
|
||||
- |torch_versions|
|
||||
- |k2_versions|
|
||||
|
||||
.. |os| image:: ./images/os-Linux_macOS-ff69b4.svg
|
||||
:alt: Supported operating systems
|
||||
|
||||
.. |device| image:: ./images/device-CPU_CUDA-orange.svg
|
||||
:alt: Supported devices
|
||||
|
||||
.. |python_versions| image:: ./images/python-gt-v3.6-blue.svg
|
||||
:alt: Supported python versions
|
||||
|
||||
.. |torch_versions| image:: ./images/torch-gt-v1.6.0-green.svg
|
||||
:alt: Supported PyTorch versions
|
||||
|
||||
.. |k2_versions| image:: ./images/k2-gt-v1.9-blueviolet.svg
|
||||
:alt: Supported k2 versions
|
||||
|
||||
``icefall`` depends on `k2 <https://github.com/k2-fsa/k2>`_ and
|
||||
`lhotse <https://github.com/lhotse-speech/lhotse>`_.
|
||||
|
||||
We recommend you to use the following steps to install the dependencies.
|
||||
|
||||
- (0) Install PyTorch and torchaudio
|
||||
- (1) Install k2
|
||||
- (2) Install lhotse
|
||||
|
||||
.. caution::
|
||||
|
||||
Installation order matters.
|
||||
|
||||
(0) Install PyTorch and torchaudio
|
||||
----------------------------------
|
||||
|
||||
Please refer `<https://pytorch.org/>`_ to install PyTorch
|
||||
and torchaudio.
|
||||
|
||||
|
||||
(1) Install k2
|
||||
--------------
|
||||
|
||||
Please refer to `<https://k2-fsa.github.io/k2/installation/index.html>`_
|
||||
to install ``k2``.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You need to install ``k2`` with a version at least **v1.9**.
|
||||
|
||||
.. HINT::
|
||||
|
||||
If you have already installed PyTorch and don't want to replace it,
|
||||
please install a version of ``k2`` that is compiled against the version
|
||||
of PyTorch you are using.
|
||||
|
||||
(2) Install lhotse
|
||||
------------------
|
||||
|
||||
Please refer to `<https://lhotse.readthedocs.io/en/latest/getting-started.html#installation>`_
|
||||
to install ``lhotse``.
|
||||
|
||||
|
||||
.. hint::
|
||||
|
||||
We strongly recommend you to use::
|
||||
|
||||
pip install git+https://github.com/lhotse-speech/lhotse
|
||||
|
||||
to install the latest version of lhotse.
|
||||
|
||||
|
||||
(3) Download icefall
|
||||
--------------------
|
||||
|
||||
``icefall`` is a collection of Python scripts; what you need is to download it
|
||||
and set the environment variable ``PYTHONPATH`` to point to it.
|
||||
|
||||
Assume you want to place ``icefall`` in the folder ``/tmp``. The
|
||||
following commands show you how to setup ``icefall``:
|
||||
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd /tmp
|
||||
git clone https://github.com/k2-fsa/icefall
|
||||
cd icefall
|
||||
pip install -r requirements.txt
|
||||
export PYTHONPATH=/tmp/icefall:$PYTHONPATH
|
||||
|
||||
.. HINT::
|
||||
|
||||
You can put several versions of ``icefall`` in the same virtual environment.
|
||||
To switch among different versions of ``icefall``, just set ``PYTHONPATH``
|
||||
to point to the version you want.
|
||||
|
||||
|
||||
Installation example
|
||||
--------------------
|
||||
|
||||
The following shows an example about setting up the environment.
|
||||
|
||||
|
||||
(1) Create a virtual environment
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ virtualenv -p python3.8 test-icefall
|
||||
|
||||
created virtual environment CPython3.8.6.final.0-64 in 1540ms
|
||||
creator CPython3Posix(dest=/ceph-fj/fangjun/test-icefall, clear=False, no_vcs_ignore=False, global=False)
|
||||
seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=copy, app_data_dir=/root/fangjun/.local/share/v
|
||||
irtualenv)
|
||||
added seed packages: pip==21.1.3, setuptools==57.4.0, wheel==0.36.2
|
||||
activators BashActivator,CShellActivator,FishActivator,PowerShellActivator,PythonActivator,XonshActivator
|
||||
|
||||
|
||||
(2) Activate your virtual environment
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ source test-icefall/bin/activate
|
||||
|
||||
(3) Install k2
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ pip install k2==1.4.dev20210822+cpu.torch1.9.0 -f https://k2-fsa.org/nightly/index.html
|
||||
|
||||
Looking in links: https://k2-fsa.org/nightly/index.html
|
||||
Collecting k2==1.4.dev20210822+cpu.torch1.9.0
|
||||
Downloading https://k2-fsa.org/nightly/whl/k2-1.4.dev20210822%2Bcpu.torch1.9.0-cp38-cp38-linux_x86_64.whl (1.6 MB)
|
||||
|________________________________| 1.6 MB 185 kB/s
|
||||
Collecting graphviz
|
||||
Downloading graphviz-0.17-py3-none-any.whl (18 kB)
|
||||
Collecting torch==1.9.0
|
||||
Using cached torch-1.9.0-cp38-cp38-manylinux1_x86_64.whl (831.4 MB)
|
||||
Collecting typing-extensions
|
||||
Using cached typing_extensions-3.10.0.0-py3-none-any.whl (26 kB)
|
||||
Installing collected packages: typing-extensions, torch, graphviz, k2
|
||||
Successfully installed graphviz-0.17 k2-1.4.dev20210822+cpu.torch1.9.0 torch-1.9.0 typing-extensions-3.10.0.0
|
||||
|
||||
.. WARNING::
|
||||
|
||||
We choose to install a CPU version of k2 for testing. You would probably want to install
|
||||
a CUDA version of k2.
|
||||
|
||||
|
||||
(4) Install lhotse
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ pip install git+https://github.com/lhotse-speech/lhotse
|
||||
|
||||
Collecting git+https://github.com/lhotse-speech/lhotse
|
||||
Cloning https://github.com/lhotse-speech/lhotse to /tmp/pip-req-build-7b1b76ge
|
||||
Running command git clone -q https://github.com/lhotse-speech/lhotse /tmp/pip-req-build-7b1b76ge
|
||||
Collecting audioread>=2.1.9
|
||||
Using cached audioread-2.1.9-py3-none-any.whl
|
||||
Collecting SoundFile>=0.10
|
||||
Using cached SoundFile-0.10.3.post1-py2.py3-none-any.whl (21 kB)
|
||||
Collecting click>=7.1.1
|
||||
Using cached click-8.0.1-py3-none-any.whl (97 kB)
|
||||
Collecting cytoolz>=0.10.1
|
||||
Using cached cytoolz-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB)
|
||||
Collecting dataclasses
|
||||
Using cached dataclasses-0.6-py3-none-any.whl (14 kB)
|
||||
Collecting h5py>=2.10.0
|
||||
Downloading h5py-3.4.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB)
|
||||
|________________________________| 4.5 MB 684 kB/s
|
||||
Collecting intervaltree>=3.1.0
|
||||
Using cached intervaltree-3.1.0-py2.py3-none-any.whl
|
||||
Collecting lilcom>=1.1.0
|
||||
Using cached lilcom-1.1.1-cp38-cp38-linux_x86_64.whl
|
||||
Collecting numpy>=1.18.1
|
||||
Using cached numpy-1.21.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.8 MB)
|
||||
Collecting packaging
|
||||
Using cached packaging-21.0-py3-none-any.whl (40 kB)
|
||||
Collecting pyyaml>=5.3.1
|
||||
Using cached PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl (662 kB)
|
||||
Collecting tqdm
|
||||
Downloading tqdm-4.62.1-py2.py3-none-any.whl (76 kB)
|
||||
|________________________________| 76 kB 2.7 MB/s
|
||||
Collecting torchaudio==0.9.0
|
||||
Downloading torchaudio-0.9.0-cp38-cp38-manylinux1_x86_64.whl (1.9 MB)
|
||||
|________________________________| 1.9 MB 73.1 MB/s
|
||||
Requirement already satisfied: torch==1.9.0 in ./test-icefall/lib/python3.8/site-packages (from torchaudio==0.9.0->lhotse===0.8.0.dev
|
||||
-2a1410b-clean) (1.9.0)
|
||||
Requirement already satisfied: typing-extensions in ./test-icefall/lib/python3.8/site-packages (from torch==1.9.0->torchaudio==0.9.0-
|
||||
>lhotse===0.8.0.dev-2a1410b-clean) (3.10.0.0)
|
||||
Collecting toolz>=0.8.0
|
||||
Using cached toolz-0.11.1-py3-none-any.whl (55 kB)
|
||||
Collecting sortedcontainers<3.0,>=2.0
|
||||
Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)
|
||||
Collecting cffi>=1.0
|
||||
Using cached cffi-1.14.6-cp38-cp38-manylinux1_x86_64.whl (411 kB)
|
||||
Collecting pycparser
|
||||
Using cached pycparser-2.20-py2.py3-none-any.whl (112 kB)
|
||||
Collecting pyparsing>=2.0.2
|
||||
Using cached pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)
|
||||
Building wheels for collected packages: lhotse
|
||||
Building wheel for lhotse (setup.py) ... done
|
||||
Created wheel for lhotse: filename=lhotse-0.8.0.dev_2a1410b_clean-py3-none-any.whl size=342242 sha256=f683444afa4dc0881133206b4646a
|
||||
9d0f774224cc84000f55d0a67f6e4a37997
|
||||
Stored in directory: /tmp/pip-ephem-wheel-cache-ftu0qysz/wheels/7f/7a/8e/a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f
|
||||
WARNING: Built wheel for lhotse is invalid: Metadata 1.2 mandates PEP 440 version, but '0.8.0.dev-2a1410b-clean' is not
|
||||
Failed to build lhotse
|
||||
Installing collected packages: pycparser, toolz, sortedcontainers, pyparsing, numpy, cffi, tqdm, torchaudio, SoundFile, pyyaml, packa
|
||||
ging, lilcom, intervaltree, h5py, dataclasses, cytoolz, click, audioread, lhotse
|
||||
Running setup.py install for lhotse ... done
|
||||
DEPRECATION: lhotse was installed using the legacy 'setup.py install' method, because a wheel could not be built for it. A possible
|
||||
replacement is to fix the wheel build issue reported above. You can find discussion regarding this at https://github.com/pypa/pip/is
|
||||
sues/8368.
|
||||
Successfully installed SoundFile-0.10.3.post1 audioread-2.1.9 cffi-1.14.6 click-8.0.1 cytoolz-0.11.0 dataclasses-0.6 h5py-3.4.0 inter
|
||||
valtree-3.1.0 lhotse-0.8.0.dev-2a1410b-clean lilcom-1.1.1 numpy-1.21.2 packaging-21.0 pycparser-2.20 pyparsing-2.4.7 pyyaml-5.4.1 sor
|
||||
tedcontainers-2.4.0 toolz-0.11.1 torchaudio-0.9.0 tqdm-4.62.1
|
||||
|
||||
(5) Download icefall
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd /tmp
|
||||
$ git clone https://github.com/k2-fsa/icefall
|
||||
|
||||
Cloning into 'icefall'...
|
||||
remote: Enumerating objects: 500, done.
|
||||
remote: Counting objects: 100% (500/500), done.
|
||||
remote: Compressing objects: 100% (308/308), done.
|
||||
remote: Total 500 (delta 263), reused 307 (delta 102), pack-reused 0
|
||||
Receiving objects: 100% (500/500), 172.49 KiB | 385.00 KiB/s, done.
|
||||
Resolving deltas: 100% (263/263), done.
|
||||
|
||||
$ cd icefall
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Collecting kaldilm
|
||||
Downloading kaldilm-1.8.tar.gz (48 kB)
|
||||
|________________________________| 48 kB 574 kB/s
|
||||
Collecting kaldialign
|
||||
Using cached kaldialign-0.2-cp38-cp38-linux_x86_64.whl
|
||||
Collecting sentencepiece>=0.1.96
|
||||
Using cached sentencepiece-0.1.96-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
|
||||
Collecting tensorboard
|
||||
Using cached tensorboard-2.6.0-py3-none-any.whl (5.6 MB)
|
||||
Requirement already satisfied: setuptools>=41.0.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r
|
||||
requirements.txt (line 4)) (57.4.0)
|
||||
Collecting absl-py>=0.4
|
||||
Using cached absl_py-0.13.0-py3-none-any.whl (132 kB)
|
||||
Collecting google-auth-oauthlib<0.5,>=0.4.1
|
||||
Using cached google_auth_oauthlib-0.4.5-py2.py3-none-any.whl (18 kB)
|
||||
Collecting grpcio>=1.24.3
|
||||
Using cached grpcio-1.39.0-cp38-cp38-manylinux2014_x86_64.whl (4.3 MB)
|
||||
Requirement already satisfied: wheel>=0.26 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r require
|
||||
ments.txt (line 4)) (0.36.2)
|
||||
Requirement already satisfied: numpy>=1.12.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r requi
|
||||
rements.txt (line 4)) (1.21.2)
|
||||
Collecting protobuf>=3.6.0
|
||||
Using cached protobuf-3.17.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
|
||||
Collecting werkzeug>=0.11.15
|
||||
Using cached Werkzeug-2.0.1-py3-none-any.whl (288 kB)
|
||||
Collecting tensorboard-data-server<0.7.0,>=0.6.0
|
||||
Using cached tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl (4.9 MB)
|
||||
Collecting google-auth<2,>=1.6.3
|
||||
Downloading google_auth-1.35.0-py2.py3-none-any.whl (152 kB)
|
||||
|________________________________| 152 kB 1.4 MB/s
|
||||
Collecting requests<3,>=2.21.0
|
||||
Using cached requests-2.26.0-py2.py3-none-any.whl (62 kB)
|
||||
Collecting tensorboard-plugin-wit>=1.6.0
|
||||
Using cached tensorboard_plugin_wit-1.8.0-py3-none-any.whl (781 kB)
|
||||
Collecting markdown>=2.6.8
|
||||
Using cached Markdown-3.3.4-py3-none-any.whl (97 kB)
|
||||
Collecting six
|
||||
Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)
|
||||
Collecting cachetools<5.0,>=2.0.0
|
||||
Using cached cachetools-4.2.2-py3-none-any.whl (11 kB)
|
||||
Collecting rsa<5,>=3.1.4
|
||||
Using cached rsa-4.7.2-py3-none-any.whl (34 kB)
|
||||
Collecting pyasn1-modules>=0.2.1
|
||||
Using cached pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB)
|
||||
Collecting requests-oauthlib>=0.7.0
|
||||
Using cached requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
|
||||
Collecting pyasn1<0.5.0,>=0.4.6
|
||||
Using cached pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
|
||||
Collecting urllib3<1.27,>=1.21.1
|
||||
Using cached urllib3-1.26.6-py2.py3-none-any.whl (138 kB)
|
||||
Collecting certifi>=2017.4.17
|
||||
Using cached certifi-2021.5.30-py2.py3-none-any.whl (145 kB)
|
||||
Collecting charset-normalizer~=2.0.0
|
||||
Using cached charset_normalizer-2.0.4-py3-none-any.whl (36 kB)
|
||||
Collecting idna<4,>=2.5
|
||||
Using cached idna-3.2-py3-none-any.whl (59 kB)
|
||||
Collecting oauthlib>=3.0.0
|
||||
Using cached oauthlib-3.1.1-py2.py3-none-any.whl (146 kB)
|
||||
Building wheels for collected packages: kaldilm
|
||||
Building wheel for kaldilm (setup.py) ... done
|
||||
Created wheel for kaldilm: filename=kaldilm-1.8-cp38-cp38-linux_x86_64.whl size=897233 sha256=eccb906cafcd45bf9a7e1a1718e4534254bfb
|
||||
f4c0d0cbc66eee6c88d68a63862
|
||||
Stored in directory: /root/fangjun/.cache/pip/wheels/85/7d/63/f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9
|
||||
Successfully built kaldilm
|
||||
Installing collected packages: urllib3, pyasn1, idna, charset-normalizer, certifi, six, rsa, requests, pyasn1-modules, oauthlib, cach
|
||||
etools, requests-oauthlib, google-auth, werkzeug, tensorboard-plugin-wit, tensorboard-data-server, protobuf, markdown, grpcio, google
|
||||
-auth-oauthlib, absl-py, tensorboard, sentencepiece, kaldilm, kaldialign
|
||||
Successfully installed absl-py-0.13.0 cachetools-4.2.2 certifi-2021.5.30 charset-normalizer-2.0.4 google-auth-1.35.0 google-auth-oaut
|
||||
hlib-0.4.5 grpcio-1.39.0 idna-3.2 kaldialign-0.2 kaldilm-1.8 markdown-3.3.4 oauthlib-3.1.1 protobuf-3.17.3 pyasn1-0.4.8 pyasn1-module
|
||||
s-0.2.8 requests-2.26.0 requests-oauthlib-1.3.0 rsa-4.7.2 sentencepiece-0.1.96 six-1.16.0 tensorboard-2.6.0 tensorboard-data-server-0
|
||||
.6.1 tensorboard-plugin-wit-1.8.0 urllib3-1.26.6 werkzeug-2.0.1
|
||||
|
||||
|
||||
Test Your Installation
|
||||
----------------------
|
||||
|
||||
To test that your installation is successful, let us run
|
||||
the `yesno recipe <https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR>`_
|
||||
on CPU.
|
||||
|
||||
Data preparation
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export PYTHONPATH=/tmp/icefall:$PYTHONPATH
|
||||
$ cd /tmp/icefall
|
||||
$ cd egs/yesno/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The log of running ``./prepare.sh`` is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-23 19:27:26 (prepare.sh:24:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download
|
||||
2021-08-23 19:27:26 (prepare.sh:27:main) stage 0: Download data
|
||||
Downloading waves_yesno.tar.gz: 4.49MB [00:03, 1.39MB/s]
|
||||
2021-08-23 19:27:30 (prepare.sh:36:main) Stage 1: Prepare yesno manifest
|
||||
2021-08-23 19:27:31 (prepare.sh:42:main) Stage 2: Compute fbank for yesno
|
||||
2021-08-23 19:27:32,803 INFO [compute_fbank_yesno.py:52] Processing train
|
||||
Extracting and storing features: 100%|_______________________________________________________________| 90/90 [00:01<00:00, 80.57it/s]
|
||||
2021-08-23 19:27:34,085 INFO [compute_fbank_yesno.py:52] Processing test
|
||||
Extracting and storing features: 100%|______________________________________________________________| 30/30 [00:00<00:00, 248.21it/s]
|
||||
2021-08-23 19:27:34 (prepare.sh:48:main) Stage 3: Prepare lang
|
||||
2021-08-23 19:27:35 (prepare.sh:63:main) Stage 4: Prepare G
|
||||
/tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
|
||||
d(std::istream&):79
|
||||
[I] Reading \data\ section.
|
||||
/tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
|
||||
d(std::istream&):140
|
||||
[I] Reading \1-grams: section.
|
||||
2021-08-23 19:27:35 (prepare.sh:89:main) Stage 5: Compile HLG
|
||||
2021-08-23 19:27:35,928 INFO [compile_hlg.py:120] Processing data/lang_phone
|
||||
2021-08-23 19:27:35,929 INFO [lexicon.py:116] Converting L.pt to Linv.pt
|
||||
2021-08-23 19:27:35,931 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3
|
||||
2021-08-23 19:27:35,932 INFO [compile_hlg.py:52] Loading G.fst.txt
|
||||
2021-08-23 19:27:35,932 INFO [compile_hlg.py:62] Intersecting L and G
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:64] LG shape: (4, None)
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:66] Connecting LG
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None)
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:70] <class 'torch.Tensor'>
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:71] Determinizing LG
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:74] <class '_k2.RaggedInt'>
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:76] Connecting LG after k2.determinize
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:87] LG shape after k2.remove_epsilon: (6, None)
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:92] Arc sorting LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:95] Composing H and LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:102] Connecting LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:105] Arc sorting LG
|
||||
2021-08-23 19:27:35,936 INFO [compile_hlg.py:107] HLG.shape: (8, None)
|
||||
2021-08-23 19:27:35,936 INFO [compile_hlg.py:123] Saving HLG.pt to data/lang_phone
|
||||
|
||||
|
||||
Training
|
||||
~~~~~~~~
|
||||
|
||||
Now let us run the training part:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES=""
|
||||
$ ./tdnn/train.py
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
We use ``export CUDA_VISIBLE_DEVICES=""`` so that ``icefall`` uses CPU
|
||||
even if there are GPUs available.
|
||||
|
||||
The training log is given below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-23 19:30:31,072 INFO [train.py:465] Training started
|
||||
2021-08-23 19:30:31,072 INFO [train.py:466] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01,
|
||||
'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, '
|
||||
best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_doub
|
||||
le_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'feature_dir': PosixPath('data/fbank'
|
||||
), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0
|
||||
, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
|
||||
2021-08-23 19:30:31,074 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
|
||||
2021-08-23 19:30:31,098 INFO [asr_datamodule.py:146] About to get train cuts
|
||||
2021-08-23 19:30:31,098 INFO [asr_datamodule.py:240] About to get train cuts
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:149] About to create train dataset
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:200] Using SingleCutSampler.
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:206] About to create train dataloader
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:219] About to get test cuts
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:246] About to get test cuts
|
||||
2021-08-23 19:30:31,357 INFO [train.py:416] Epoch 0, batch 0, batch avg loss 1.0789, total avg loss: 1.0789, batch size: 4
|
||||
2021-08-23 19:30:31,848 INFO [train.py:416] Epoch 0, batch 10, batch avg loss 0.5356, total avg loss: 0.7556, batch size: 4
|
||||
2021-08-23 19:30:32,301 INFO [train.py:432] Epoch 0, valid loss 0.9972, best valid loss: 0.9972 best valid epoch: 0
|
||||
2021-08-23 19:30:32,805 INFO [train.py:416] Epoch 0, batch 20, batch avg loss 0.2436, total avg loss: 0.5717, batch size: 3
|
||||
2021-08-23 19:30:33,109 INFO [train.py:432] Epoch 0, valid loss 0.4167, best valid loss: 0.4167 best valid epoch: 0
|
||||
2021-08-23 19:30:33,121 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-0.pt
|
||||
2021-08-23 19:30:33,325 INFO [train.py:416] Epoch 1, batch 0, batch avg loss 0.2214, total avg loss: 0.2214, batch size: 5
|
||||
2021-08-23 19:30:33,798 INFO [train.py:416] Epoch 1, batch 10, batch avg loss 0.0781, total avg loss: 0.1343, batch size: 5
|
||||
2021-08-23 19:30:34,065 INFO [train.py:432] Epoch 1, valid loss 0.0859, best valid loss: 0.0859 best valid epoch: 1
|
||||
2021-08-23 19:30:34,556 INFO [train.py:416] Epoch 1, batch 20, batch avg loss 0.0421, total avg loss: 0.0975, batch size: 3
|
||||
2021-08-23 19:30:34,810 INFO [train.py:432] Epoch 1, valid loss 0.0431, best valid loss: 0.0431 best valid epoch: 1
|
||||
2021-08-23 19:30:34,824 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-1.pt
|
||||
|
||||
... ...
|
||||
|
||||
2021-08-23 19:30:49,657 INFO [train.py:416] Epoch 13, batch 0, batch avg loss 0.0109, total avg loss: 0.0109, batch size: 5
|
||||
2021-08-23 19:30:49,984 INFO [train.py:416] Epoch 13, batch 10, batch avg loss 0.0093, total avg loss: 0.0096, batch size: 4
|
||||
2021-08-23 19:30:50,239 INFO [train.py:432] Epoch 13, valid loss 0.0104, best valid loss: 0.0101 best valid epoch: 12
|
||||
2021-08-23 19:30:50,569 INFO [train.py:416] Epoch 13, batch 20, batch avg loss 0.0092, total avg loss: 0.0096, batch size: 2
|
||||
2021-08-23 19:30:50,819 INFO [train.py:432] Epoch 13, valid loss 0.0101, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:50,835 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-13.pt
|
||||
2021-08-23 19:30:51,024 INFO [train.py:416] Epoch 14, batch 0, batch avg loss 0.0105, total avg loss: 0.0105, batch size: 5
|
||||
2021-08-23 19:30:51,317 INFO [train.py:416] Epoch 14, batch 10, batch avg loss 0.0099, total avg loss: 0.0097, batch size: 4
|
||||
2021-08-23 19:30:51,552 INFO [train.py:432] Epoch 14, valid loss 0.0108, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:51,869 INFO [train.py:416] Epoch 14, batch 20, batch avg loss 0.0096, total avg loss: 0.0097, batch size: 5
|
||||
2021-08-23 19:30:52,107 INFO [train.py:432] Epoch 14, valid loss 0.0102, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:52,126 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-14.pt
|
||||
2021-08-23 19:30:52,128 INFO [train.py:537] Done!
|
||||
|
||||
Decoding
|
||||
~~~~~~~~
|
||||
|
||||
Let us use the trained model to decode the test set:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ ./tdnn/decode.py
|
||||
|
||||
The decoding log is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-23 19:35:30,192 INFO [decode.py:249] Decoding started
|
||||
2021-08-23 19:35:30,192 INFO [decode.py:250] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
|
||||
2021-08-23 19:35:30,193 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
|
||||
2021-08-23 19:35:30,213 INFO [decode.py:259] device: cpu
|
||||
2021-08-23 19:35:30,217 INFO [decode.py:279] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
|
||||
/tmp/icefall/icefall/checkpoint.py:146: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch.
|
||||
It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
|
||||
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at /pytorch/aten/src/ATen/native/BinaryOps.cpp:450.)
|
||||
avg[k] //= n
|
||||
2021-08-23 19:35:30,220 INFO [asr_datamodule.py:219] About to get test cuts
|
||||
2021-08-23 19:35:30,220 INFO [asr_datamodule.py:246] About to get test cuts
|
||||
2021-08-23 19:35:30,409 INFO [decode.py:190] batch 0/8, cuts processed until now is 4
|
||||
2021-08-23 19:35:30,571 INFO [decode.py:228] The transcripts are stored in tdnn/exp/recogs-test_set.txt
|
||||
2021-08-23 19:35:30,572 INFO [utils.py:317] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
|
||||
2021-08-23 19:35:30,573 INFO [decode.py:236] Wrote detailed error stats to tdnn/exp/errs-test_set.txt
|
||||
2021-08-23 19:35:30,573 INFO [decode.py:299] Done!
|
||||
|
||||
**Congratulations!** You have successfully setup the environment and have run the first recipe in ``icefall``.
|
||||
|
||||
Have fun with ``icefall``!
|
||||
|
||||
YouTube Video
|
||||
-------------
|
||||
|
||||
We provide the following YouTube video showing how to install ``icefall``.
|
||||
It also shows how to debug various problems that you may encounter while
|
||||
using ``icefall``.
|
||||
|
||||
.. note::
|
||||
|
||||
To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
|
||||
the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
|
||||
|
||||
`<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
|
||||
|
||||
.. youtube:: LVmrBD0tLfE
|
747
_sources/recipes/aishell/conformer_ctc.rst.txt
Normal file
@ -0,0 +1,747 @@
|
||||
Conformer CTC
|
||||
=============
|
||||
|
||||
This tutorial shows you how to run a conformer ctc model
|
||||
with the `Aishell <https://www.openslr.org/33>`_ dataset.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
We recommend you to use a GPU or several GPUs to run this recipe.
|
||||
|
||||
In this tutorial, you will learn:
|
||||
|
||||
- (1) How to prepare data for training and decoding
|
||||
- (2) How to start the training, either with a single GPU or multiple GPUs
|
||||
- (3) How to do decoding after training, with ctc-decoding, 1best and attention decoder rescoring
|
||||
- (4) How to use a pre-trained model, provided by us
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
.. HINT::
|
||||
|
||||
If you have pre-downloaded the `Aishell <https://www.openslr.org/33>`_
|
||||
dataset and the `musan <http://www.openslr.org/17/>`_ dataset, say,
|
||||
they are saved in ``/tmp/aishell`` and ``/tmp/musan``, you can modify
|
||||
the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
|
||||
``./prepare.sh`` won't re-download them.
|
||||
|
||||
.. HINT::
|
||||
|
||||
A 3-gram language model will be downloaded from huggingface, we assume you have
|
||||
intalled and initialized ``git-lfs``. If not, you could install ``git-lfs`` by
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ sudo apt-get install git-lfs
|
||||
$ git-lfs install
|
||||
|
||||
If you don't have the ``sudo`` permission, you could download the
|
||||
`git-lfs binary <https://github.com/git-lfs/git-lfs/releases>`_ here, then add it to you ``PATH``.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
|
||||
are saved in ``./data`` directory.
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Configurable options
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/train.py --help
|
||||
|
||||
shows you the training options that can be passed from the commandline.
|
||||
The following options are used quite often:
|
||||
|
||||
- ``--exp-dir``
|
||||
|
||||
The experiment folder to save logs and model checkpoints,
|
||||
default ``./conformer_ctc/exp``.
|
||||
|
||||
- ``--num-epochs``
|
||||
|
||||
It is the number of epochs to train. For instance,
|
||||
``./conformer_ctc/train.py --num-epochs 30`` trains for 30 epochs
|
||||
and generates ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-29.pt``
|
||||
in the folder set by ``--exp-dir``.
|
||||
|
||||
- ``--start-epoch``
|
||||
|
||||
It's used to resume training.
|
||||
``./conformer_ctc/train.py --start-epoch 10`` loads the
|
||||
checkpoint ``./conformer_ctc/exp/epoch-9.pt`` and starts
|
||||
training from epoch 10, based on the state from epoch 9.
|
||||
|
||||
- ``--world-size``
|
||||
|
||||
It is used for multi-GPU single-machine DDP training.
|
||||
|
||||
- (a) If it is 1, then no DDP training is used.
|
||||
|
||||
- (b) If it is 2, then GPU 0 and GPU 1 are used for DDP training.
|
||||
|
||||
The following shows some use cases with it.
|
||||
|
||||
**Use case 1**: You have 4 GPUs, but you only want to use GPU 0 and
|
||||
GPU 2 for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,2"
|
||||
$ ./conformer_ctc/train.py --world-size 2
|
||||
|
||||
**Use case 2**: You have 4 GPUs and you want to use all of them
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/train.py --world-size 4
|
||||
|
||||
**Use case 3**: You have 4 GPUs but you only want to use GPU 3
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="3"
|
||||
$ ./conformer_ctc/train.py --world-size 1
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
Only multi-GPU single-machine DDP training is implemented at present.
|
||||
Multi-GPU multi-machine DDP training will be added later.
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It specifies the number of seconds over all utterances in a
|
||||
batch, before **padding**.
|
||||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||||
your are using V100 NVIDIA GPU, we recommend you to set it to ``200``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
Due to padding, the number of seconds of all utterances in a
|
||||
batch will usually be larger than ``--max-duration``.
|
||||
|
||||
A larger value for ``--max-duration`` may cause OOM during training,
|
||||
while a smaller value may increase the training time. You have to
|
||||
tune it.
|
||||
|
||||
|
||||
Pre-configured options
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are some training options, e.g., weight decay,
|
||||
number of warmup steps, etc,
|
||||
that are not passed from the commandline.
|
||||
They are pre-configured by the function ``get_params()`` in
|
||||
`conformer_ctc/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/conformer_ctc/train.py>`_
|
||||
|
||||
You don't need to change these pre-configured parameters. If you really need to change
|
||||
them, please modify ``./conformer_ctc/train.py`` directly.
|
||||
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||||
Each epoch actually processes ``3x150 == 450`` hours of data.
|
||||
|
||||
|
||||
Training logs
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Training logs and checkpoints are saved in the folder set by ``--exp-dir``
|
||||
(default ``conformer_ctc/exp``). You will find the following files in that directory:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ...
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./conformer_ctc/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd conformer_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --name "Aishell conformer ctc training with icefall" --description "Training with new LabelSmoothing loss, see https://github.com/k2-fsa/icefall/pull/109"
|
||||
|
||||
It will print something like below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
TensorFlow installation not found - running with reduced feature set.
|
||||
Upload started and will continue reading any new data as it's added to the logdir.
|
||||
|
||||
To stop uploading, press Ctrl-C.
|
||||
|
||||
New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/engw8KSkTZqS24zBV5dgCg/
|
||||
|
||||
[2021-11-22T11:09:27] Started scanning logdir.
|
||||
[2021-11-22T11:10:14] Total uploaded: 116068 scalars, 0 tensors, 0 binary objects
|
||||
Listening for new data in logdir...
|
||||
|
||||
Note there is a URL in the above output, click it and you will see
|
||||
the following screenshot:
|
||||
|
||||
.. figure:: images/aishell-conformer-ctc-tensorboard-log.jpg
|
||||
:width: 600
|
||||
:alt: TensorBoard screenshot
|
||||
:align: center
|
||||
:target: https://tensorboard.dev/experiment/WE1DocDqRRCOSAgmGyClhg/
|
||||
|
||||
TensorBoard screenshot.
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
Usage examples
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
The following shows typical use cases:
|
||||
|
||||
**Case 1**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/train.py --max-duration 200
|
||||
|
||||
It uses ``--max-duration`` of 200 to avoid OOM.
|
||||
|
||||
|
||||
**Case 2**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,3"
|
||||
$ ./conformer_ctc/train.py --world-size 2
|
||||
|
||||
It uses GPU 0 and GPU 3 for DDP training.
|
||||
|
||||
**Case 3**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/train.py --num-epochs 10 --start-epoch 3
|
||||
|
||||
It loads checkpoint ``./conformer_ctc/exp/epoch-2.pt`` and starts
|
||||
training from epoch 3. Also, it trains for 10 epochs.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/decode.py --help
|
||||
|
||||
shows the options for decoding.
|
||||
|
||||
The commonly used options are:
|
||||
|
||||
- ``--method``
|
||||
|
||||
This specifies the decoding method.
|
||||
|
||||
The following command uses attention decoder for rescoring:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/decode.py --method attention-decoder --max-duration 30 --nbest-scale 0.5
|
||||
|
||||
- ``--nbest-scale``
|
||||
|
||||
It is used to scale down lattice scores so that there are more unique
|
||||
paths for rescoring.
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It has the same meaning as the one during training. A larger
|
||||
value may cause OOM.
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded a pre-trained model to
|
||||
`<https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc>`_.
|
||||
|
||||
We describe how to use the pre-trained model to transcribe a sound file or
|
||||
multiple sound files in the following.
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The following commands describe how to download the pre-trained model:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/pkufool/icefall_asr_aishell_conformer_ctc
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
In order to use this pre-trained model, your k2 version has to be v1.7 or later.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ tree tmp
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp/
|
||||
`-- icefall_asr_aishell_conformer_ctc
|
||||
|-- README.md
|
||||
|-- data
|
||||
| `-- lang_char
|
||||
| |-- HLG.pt
|
||||
| |-- tokens.txt
|
||||
| `-- words.txt
|
||||
|-- exp
|
||||
| `-- pretrained.pt
|
||||
`-- test_waves
|
||||
|-- BAC009S0764W0121.wav
|
||||
|-- BAC009S0764W0122.wav
|
||||
|-- BAC009S0764W0123.wav
|
||||
`-- trans.txt
|
||||
|
||||
5 directories, 9 files
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_char/HLG.pt``
|
||||
|
||||
It is the decoding graph.
|
||||
|
||||
- ``data/lang_char/tokens.txt``
|
||||
|
||||
It contains tokens and their IDs.
|
||||
Provided only for convenience so that you can look up the SOS/EOS ID easily.
|
||||
|
||||
- ``data/lang_char/words.txt``
|
||||
|
||||
It contains words and their IDs.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-25.pt`` to ``epoch-84.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``test_waves/*.wav``
|
||||
|
||||
It contains some test sound files from Aishell ``test`` dataset.
|
||||
|
||||
- ``test_waves/trans.txt``
|
||||
|
||||
It contains the reference transcripts for the sound files in `test_waves/`.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ soxi tmp/icefall_asr_aishell_conformer_ctc/test_wavs/*.wav
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
|
||||
File Size : 135k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
|
||||
File Size : 132k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
|
||||
File Size : 128k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
Total Duration of 3 files: 00:00:12.32
|
||||
|
||||
Usage
|
||||
~~~~~
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/pretrained.py --help
|
||||
|
||||
displays the help information.
|
||||
|
||||
It supports three decoding methods:
|
||||
|
||||
- CTC decoding
|
||||
- HLG decoding
|
||||
- HLG + attention decoder rescoring
|
||||
|
||||
CTC decoding
|
||||
^^^^^^^^^^^^
|
||||
|
||||
CTC decoding only uses the ctc topology for decoding without a lexicon and language model
|
||||
|
||||
The command to run CTC decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
|
||||
--tokens-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt \
|
||||
--method ctc-decoding \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is given below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-18 07:53:41,707 INFO [pretrained.py:229] {'sample_rate': 16000, 'subsampling_factor': 4, 'feature_dim': 80, 'nhead': 4, 'attention_dim': 512, 'num_decoder_layers': 6, 'vgg_frontend': False, 'use_feat_batchnorm': True, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'env_info': {'k2-version': '1.9', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f2fd997f752ed11bbef4c306652c433e83f9cf12', 'k2-git-date': 'Sun Sep 19 09:41:46 2021', 'lhotse-version': '0.11.0.dev+git.33cfe45.clean', 'torch-cuda-available': True, 'torch-cuda-version': '10.1', 'python-version': '3.8', 'icefall-git-branch': 'aishell', 'icefall-git-sha1': 'd57a873-dirty', 'icefall-git-date': 'Wed Nov 17 19:53:25 2021', 'icefall-path': '/ceph-hw/kangwei/code/icefall_aishell3', 'k2-path': '/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'}, 'checkpoint': './tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt', 'tokens_file': './tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/tokens.txt', 'words_file': None, 'HLG': None, 'method': 'ctc-decoding', 'num_paths': 100, 'ngram_lm_scale': 0.3, 'attention_decoder_scale': 0.9, 'nbest_scale': 0.5, 'sos_id': 1, 'eos_id': 1, 'num_classes': 4336, 'sound_files': ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']}
|
||||
2021-11-18 07:53:41,708 INFO [pretrained.py:240] device: cuda:0
|
||||
2021-11-18 07:53:41,708 INFO [pretrained.py:242] Creating model
|
||||
2021-11-18 07:53:51,131 INFO [pretrained.py:259] Constructing Fbank computer
|
||||
2021-11-18 07:53:51,134 INFO [pretrained.py:269] Reading sound files: ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']
|
||||
2021-11-18 07:53:51,138 INFO [pretrained.py:275] Decoding started
|
||||
2021-11-18 07:53:51,241 INFO [pretrained.py:293] Use CTC decoding
|
||||
2021-11-18 07:53:51,704 INFO [pretrained.py:369]
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav:
|
||||
甚 至 出 现 交 易 几 乎 停 止 的 情 况
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav:
|
||||
一 二 线 城 市 虽 然 也 处 于 调 整 中
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav:
|
||||
但 因 为 聚 集 了 过 多 公 共 资 源
|
||||
|
||||
|
||||
2021-11-18 07:53:51,704 INFO [pretrained.py:371] Decoding Done
|
||||
|
||||
|
||||
HLG decoding
|
||||
^^^^^^^^^^^^
|
||||
|
||||
HLG decoding uses the best path of the decoding lattice as the decoding result.
|
||||
|
||||
The command to run HLG decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
|
||||
--words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
|
||||
--HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
|
||||
--method 1best \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is given below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-18 07:37:38,683 INFO [pretrained.py:229] {'sample_rate': 16000, 'subsampling_factor': 4, 'feature_dim': 80, 'nhead': 4, 'attention_dim': 512, 'num_decoder_layers': 6, 'vgg_frontend': False, 'use_feat_batchnorm': True, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'env_info': {'k2-version': '1.9', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f2fd997f752ed11bbef4c306652c433e83f9cf12', 'k2-git-date': 'Sun Sep 19 09:41:46 2021', 'lhotse-version': '0.11.0.dev+git.33cfe45.clean', 'torch-cuda-available': True, 'torch-cuda-version': '10.1', 'python-version': '3.8', 'icefall-git-branch': 'aishell', 'icefall-git-sha1': 'd57a873-clean', 'icefall-git-date': 'Wed Nov 17 19:53:25 2021', 'icefall-path': '/ceph-hw/kangwei/code/icefall_aishell3', 'k2-path': '/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'}, 'checkpoint': './tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt', 'tokens_file': None, 'words_file': './tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt', 'HLG': './tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt', 'method': '1best', 'num_paths': 100, 'ngram_lm_scale': 0.3, 'attention_decoder_scale': 0.9, 'nbest_scale': 0.5, 'sos_id': 1, 'eos_id': 1, 'num_classes': 4336, 'sound_files': ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']}
|
||||
2021-11-18 07:37:38,684 INFO [pretrained.py:240] device: cuda:0
|
||||
2021-11-18 07:37:38,684 INFO [pretrained.py:242] Creating model
|
||||
2021-11-18 07:37:47,651 INFO [pretrained.py:259] Constructing Fbank computer
|
||||
2021-11-18 07:37:47,654 INFO [pretrained.py:269] Reading sound files: ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']
|
||||
2021-11-18 07:37:47,659 INFO [pretrained.py:275] Decoding started
|
||||
2021-11-18 07:37:47,752 INFO [pretrained.py:321] Loading HLG from ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt
|
||||
2021-11-18 07:37:51,887 INFO [pretrained.py:340] Use HLG decoding
|
||||
2021-11-18 07:37:52,102 INFO [pretrained.py:370]
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav:
|
||||
甚至 出现 交易 几乎 停止 的 情况
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav:
|
||||
一二 线 城市 虽然 也 处于 调整 中
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav:
|
||||
但 因为 聚集 了 过多 公共 资源
|
||||
|
||||
|
||||
2021-11-18 07:37:52,102 INFO [pretrained.py:372] Decoding Done
|
||||
|
||||
|
||||
HLG decoding + attention decoder rescoring
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
It extracts n paths from the lattice, recores the extracted paths with
|
||||
an attention decoder. The path with the highest score is the decoding result.
|
||||
|
||||
The command to run HLG decoding + attention decoder rescoring is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./conformer_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt \
|
||||
--words-file ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
|
||||
--HLG ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
|
||||
--method attention-decoder \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-18 07:42:05,965 INFO [pretrained.py:229] {'sample_rate': 16000, 'subsampling_factor': 4, 'feature_dim': 80, 'nhead': 4, 'attention_dim': 512, 'num_decoder_layers': 6, 'vgg_frontend': False, 'use_feat_batchnorm': True, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'env_info': {'k2-version': '1.9', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f2fd997f752ed11bbef4c306652c433e83f9cf12', 'k2-git-date': 'Sun Sep 19 09:41:46 2021', 'lhotse-version': '0.11.0.dev+git.33cfe45.clean', 'torch-cuda-available': True, 'torch-cuda-version': '10.1', 'python-version': '3.8', 'icefall-git-branch': 'aishell', 'icefall-git-sha1': 'd57a873-dirty', 'icefall-git-date': 'Wed Nov 17 19:53:25 2021', 'icefall-path': '/ceph-hw/kangwei/code/icefall_aishell3', 'k2-path': '/ceph-hw/kangwei/code/k2_release/k2/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-hw/kangwei/code/lhotse/lhotse/__init__.py'}, 'checkpoint': './tmp/icefall_asr_aishell_conformer_ctc/exp/pretrained.pt', 'tokens_file': None, 'words_file': './tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt', 'HLG': './tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt', 'method': 'attention-decoder', 'num_paths': 100, 'ngram_lm_scale': 0.3, 'attention_decoder_scale': 0.9, 'nbest_scale': 0.5, 'sos_id': 1, 'eos_id': 1, 'num_classes': 4336, 'sound_files': ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']}
|
||||
2021-11-18 07:42:05,966 INFO [pretrained.py:240] device: cuda:0
|
||||
2021-11-18 07:42:05,966 INFO [pretrained.py:242] Creating model
|
||||
2021-11-18 07:42:16,821 INFO [pretrained.py:259] Constructing Fbank computer
|
||||
2021-11-18 07:42:16,822 INFO [pretrained.py:269] Reading sound files: ['./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav']
|
||||
2021-11-18 07:42:16,826 INFO [pretrained.py:275] Decoding started
|
||||
2021-11-18 07:42:16,916 INFO [pretrained.py:321] Loading HLG from ./tmp/icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt
|
||||
2021-11-18 07:42:21,115 INFO [pretrained.py:345] Use HLG + attention decoder rescoring
|
||||
2021-11-18 07:42:21,888 INFO [pretrained.py:370]
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav:
|
||||
甚至 出现 交易 几乎 停止 的 情况
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav:
|
||||
一二 线 城市 虽然 也 处于 调整 中
|
||||
|
||||
./tmp/icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav:
|
||||
但 因为 聚集 了 过多 公共 资源
|
||||
|
||||
|
||||
2021-11-18 07:42:21,889 INFO [pretrained.py:372] Decoding Done
|
||||
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We do provide a colab notebook for this recipe showing how to use a pre-trained model.
|
||||
|
||||
|aishell asr conformer ctc colab notebook|
|
||||
|
||||
.. |aishell asr conformer ctc colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1WnG17io5HEZ0Gn_cnh_VzK5QYOoiiklC
|
||||
|
||||
.. HINT::
|
||||
|
||||
Due to limited memory provided by Colab, you have to upgrade to Colab Pro to
|
||||
run ``HLG decoding + attention decoder rescoring``.
|
||||
Otherwise, you can only run ``HLG decoding`` with Colab.
|
||||
|
||||
**Congratulations!** You have finished the aishell ASR recipe with
|
||||
conformer CTC models in ``icefall``.
|
||||
|
||||
|
||||
If you want to deploy your trained model in C++, please read the following section.
|
||||
|
||||
Deployment with C++
|
||||
-------------------
|
||||
|
||||
This section describes how to deploy the pre-trained model in C++, without
|
||||
Python dependencies.
|
||||
|
||||
.. HINT::
|
||||
|
||||
At present, it does NOT support streaming decoding.
|
||||
|
||||
First, let us compile k2 from source:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd $HOME
|
||||
$ git clone https://github.com/k2-fsa/k2
|
||||
$ cd k2
|
||||
$ git checkout v2.0-pre
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to switch to the branch ``v2.0-pre``!
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ mkdir build-release
|
||||
$ cd build-release
|
||||
$ cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
$ make -j hlg_decode
|
||||
|
||||
# You will find four binaries in `./bin`, i.e. ./bin/hlg_decode,
|
||||
|
||||
Now you are ready to go!
|
||||
|
||||
Assume you have run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd k2/build-release
|
||||
$ ln -s /path/to/icefall-asr-aishell-conformer-ctc ./
|
||||
|
||||
To view the usage of ``./bin/hlg_decode``, run:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ ./bin/hlg_decode
|
||||
|
||||
It will show you the following message:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
Please provide --nn_model
|
||||
|
||||
This file implements decoding with an HLG decoding graph.
|
||||
|
||||
Usage:
|
||||
./bin/hlg_decode \
|
||||
--use_gpu true \
|
||||
--nn_model <path to torch scripted pt file> \
|
||||
--hlg <path to HLG.pt> \
|
||||
--word_table <path to words.txt> \
|
||||
<path to foo.wav> \
|
||||
<path to bar.wav> \
|
||||
<more waves if any>
|
||||
|
||||
To see all possible options, use
|
||||
./bin/hlg_decode --help
|
||||
|
||||
Caution:
|
||||
- Only sound files (*.wav) with single channel are supported.
|
||||
- It assumes the model is conformer_ctc/transformer.py from icefall.
|
||||
If you use a different model, you have to change the code
|
||||
related to `model.forward` in this file.
|
||||
|
||||
|
||||
HLG decoding
|
||||
^^^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./bin/hlg_decode \
|
||||
--use_gpu true \
|
||||
--nn_model icefall_asr_aishell_conformer_ctc/exp/cpu_jit.pt \
|
||||
--hlg icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt \
|
||||
--word_table icefall_asr_aishell_conformer_ctc/data/lang_char/words.txt \
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav \
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav \
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-18 14:48:20.89 [I] k2/torch/bin/hlg_decode.cu:115:int main(int, char**) Device: cpu
|
||||
2021-11-18 14:48:20.89 [I] k2/torch/bin/hlg_decode.cu:124:int main(int, char**) Load wave files
|
||||
2021-11-18 14:48:20.97 [I] k2/torch/bin/hlg_decode.cu:131:int main(int, char**) Build Fbank computer
|
||||
2021-11-18 14:48:20.98 [I] k2/torch/bin/hlg_decode.cu:142:int main(int, char**) Compute features
|
||||
2021-11-18 14:48:20.115 [I] k2/torch/bin/hlg_decode.cu:150:int main(int, char**) Load neural network model
|
||||
2021-11-18 14:48:20.693 [I] k2/torch/bin/hlg_decode.cu:165:int main(int, char**) Compute nnet_output
|
||||
2021-11-18 14:48:23.182 [I] k2/torch/bin/hlg_decode.cu:180:int main(int, char**) Load icefall_asr_aishell_conformer_ctc/data/lang_char/HLG.pt
|
||||
2021-11-18 14:48:33.489 [I] k2/torch/bin/hlg_decode.cu:185:int main(int, char**) Decoding
|
||||
2021-11-18 14:48:45.217 [I] k2/torch/bin/hlg_decode.cu:216:int main(int, char**)
|
||||
Decoding result:
|
||||
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0121.wav
|
||||
甚至 出现 交易 几乎 停止 的 情况
|
||||
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0122.wav
|
||||
一二 线 城市 虽然 也 处于 调整 中
|
||||
|
||||
icefall_asr_aishell_conformer_ctc/test_waves/BAC009S0764W0123.wav
|
||||
但 因为 聚集 了 过多 公共 资源
|
||||
|
||||
There is a Colab notebook showing you how to run a torch scripted model in C++.
|
||||
Please see |aishell asr conformer ctc torch script colab notebook|
|
||||
|
||||
.. |aishell asr conformer ctc torch script colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1Vh7RER7saTW01DtNbvr7CY7ovNZgmfWz?usp=sharing
|
22
_sources/recipes/aishell/index.rst.txt
Normal file
@ -0,0 +1,22 @@
|
||||
aishell
|
||||
=======
|
||||
|
||||
Aishell is an open-source Chinese Mandarin speech corpus published by Beijing
|
||||
Shell Shell Technology Co.,Ltd.
|
||||
|
||||
400 people from different accent areas in China are invited to participate in
|
||||
the recording, which is conducted in a quiet indoor environment using high
|
||||
fidelity microphone and downsampled to 16kHz. The manual transcription accuracy
|
||||
is above 95%, through professional speech annotation and strict quality
|
||||
inspection. The data is free for academic use. We hope to provide moderate
|
||||
amount of data for new researchers in the field of speech recognition.
|
||||
|
||||
It can be downloaded from `<https://www.openslr.org/33/>`_
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
tdnn_lstm_ctc
|
||||
conformer_ctc
|
||||
stateless_transducer
|
||||
|
714
_sources/recipes/aishell/stateless_transducer.rst.txt
Normal file
@ -0,0 +1,714 @@
|
||||
Stateless Transducer
|
||||
====================
|
||||
|
||||
This tutorial shows you how to do transducer training in ``icefall``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
Instead of using RNN-T or RNN transducer, we only use transducer
|
||||
here. As you will see, there are no RNNs in the model.
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
We recommend you to use a GPU or several GPUs to run this recipe.
|
||||
|
||||
In this tutorial, you will learn:
|
||||
|
||||
- (1) What does the transducer model look like
|
||||
- (2) How to prepare data for training and decoding
|
||||
- (3) How to start the training, either with a single GPU or with multiple GPUs
|
||||
- (4) How to do decoding after training, with greedy search, beam search and, **modified beam search**
|
||||
- (5) How to use a pre-trained model provided by us to transcribe sound files
|
||||
|
||||
|
||||
The Model
|
||||
---------
|
||||
|
||||
The transducer model consists of 3 parts:
|
||||
|
||||
- **Encoder**: It is a conformer encoder with the following parameters
|
||||
|
||||
- Number of heads: 8
|
||||
- Attention dim: 512
|
||||
- Number of layers: 12
|
||||
- Feedforward dim: 2048
|
||||
|
||||
- **Decoder**: We use a stateless model consisting of:
|
||||
|
||||
- An embedding layer with embedding dim 512
|
||||
- A Conv1d layer with a default kernel size 2 (i.e. it sees 2
|
||||
symbols of left-context by default)
|
||||
|
||||
- **Joiner**: It consists of a ``nn.tanh()`` and a ``nn.Linear()``.
|
||||
|
||||
.. Caution::
|
||||
|
||||
The decoder is stateless and very simple. It is borrowed from
|
||||
`<https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=9054419>`_
|
||||
(Rnn-Transducer with Stateless Prediction Network)
|
||||
|
||||
We make one modification to it: Place a Conv1d layer right after
|
||||
the embedding layer.
|
||||
|
||||
When using Chinese characters as modelling unit, whose vocabulary size
|
||||
is 4336 in this specific dataset,
|
||||
the number of parameters of the model is ``87939824``, i.e., about ``88 M``.
|
||||
|
||||
The Loss
|
||||
--------
|
||||
|
||||
We are using `<https://github.com/csukuangfj/optimized_transducer>`_
|
||||
to compute the transducer loss, which removes extra paddings
|
||||
in loss computation to save memory.
|
||||
|
||||
.. Hint::
|
||||
|
||||
``optimized_transducer`` implements the technqiues proposed
|
||||
in `Improving RNN Transducer Modeling for End-to-End Speech Recognition <https://arxiv.org/abs/1909.12415>`_ to save memory.
|
||||
|
||||
Furthermore, it supports ``modified transducer``, limiting the maximum
|
||||
number of symbols that can be emitted per frame to 1, which simplifies
|
||||
the decoding process significantly. Also, the experiment results
|
||||
show that it does not degrade the performance.
|
||||
|
||||
See `<https://github.com/csukuangfj/optimized_transducer#modified-transducer>`_
|
||||
for what exactly modified transducer is.
|
||||
|
||||
`<https://github.com/csukuangfj/transducer-loss-benchmarking>`_ shows that
|
||||
in the unpruned case ``optimized_transducer`` has the advantage about minimizing
|
||||
memory usage.
|
||||
|
||||
.. todo::
|
||||
|
||||
Add tutorial about ``pruned_transducer_stateless`` that uses k2
|
||||
pruned transducer loss.
|
||||
|
||||
.. hint::
|
||||
|
||||
You can use::
|
||||
|
||||
pip install optimized_transducer
|
||||
|
||||
to install ``optimized_transducer``. Refer to
|
||||
`<https://github.com/csukuangfj/optimized_transducer>`_ for other
|
||||
alternatives.
|
||||
|
||||
Data Preparation
|
||||
----------------
|
||||
|
||||
To prepare the data for training, please use the following commands:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd egs/aishell/ASR
|
||||
./prepare.sh --stop-stage 4
|
||||
./prepare.sh --stage 6 --stop-stage 6
|
||||
|
||||
.. note::
|
||||
|
||||
You can use ``./prepare.sh``, though it will generate FSTs that
|
||||
are not used in transducer training.
|
||||
|
||||
When you finish running the script, you will get the following two folders:
|
||||
|
||||
- ``data/fbank``: It saves the pre-computed features
|
||||
- ``data/lang_char``: It contains tokens that will be used in the training
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd egs/aishell/ASR
|
||||
./transducer_stateless_modified/train.py --help
|
||||
|
||||
shows you the training options that can be passed from the commandline.
|
||||
The following options are used quite often:
|
||||
|
||||
- ``--exp-dir``
|
||||
|
||||
The experiment folder to save logs and model checkpoints,
|
||||
defaults to ``./transducer_stateless_modified/exp``.
|
||||
|
||||
- ``--num-epochs``
|
||||
|
||||
It is the number of epochs to train. For instance,
|
||||
``./transducer_stateless_modified/train.py --num-epochs 30`` trains for 30
|
||||
epochs and generates ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-29.pt``
|
||||
in the folder set by ``--exp-dir``.
|
||||
|
||||
- ``--start-epoch``
|
||||
|
||||
It's used to resume training.
|
||||
``./transducer_stateless_modified/train.py --start-epoch 10`` loads the
|
||||
checkpoint from ``exp_dir/epoch-9.pt`` and starts
|
||||
training from epoch 10, based on the state from epoch 9.
|
||||
|
||||
- ``--world-size``
|
||||
|
||||
It is used for single-machine multi-GPU DDP training.
|
||||
|
||||
- (a) If it is 1, then no DDP training is used.
|
||||
|
||||
- (b) If it is 2, then GPU 0 and GPU 1 are used for DDP training.
|
||||
|
||||
The following shows some use cases with it.
|
||||
|
||||
**Use case 1**: You have 4 GPUs, but you only want to use GPU 0 and
|
||||
GPU 2 for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,2"
|
||||
$ ./transducer_stateless_modified/train.py --world-size 2
|
||||
|
||||
**Use case 2**: You have 4 GPUs and you want to use all of them
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/train.py --world-size 4
|
||||
|
||||
**Use case 3**: You have 4 GPUs but you only want to use GPU 3
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="3"
|
||||
$ ./transducer_stateless_modified/train.py --world-size 1
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
Only single-machine multi-GPU DDP training is implemented at present.
|
||||
There is an on-going PR `<https://github.com/k2-fsa/icefall/pull/63>`_
|
||||
that adds support for multi-machine multi-GPU DDP training.
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It specifies the number of seconds over all utterances in a
|
||||
batch **before padding**.
|
||||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||||
your are using V100 NVIDIA GPU with 32 GB RAM, we recommend you
|
||||
to set it to ``300`` when the vocabulary size is 500.
|
||||
|
||||
.. HINT::
|
||||
|
||||
Due to padding, the number of seconds of all utterances in a
|
||||
batch will usually be larger than ``--max-duration``.
|
||||
|
||||
A larger value for ``--max-duration`` may cause OOM during training,
|
||||
while a smaller value may increase the training time. You have to
|
||||
tune it.
|
||||
|
||||
- ``--lr-factor``
|
||||
|
||||
It controls the learning rate. If you use a single GPU for training, you
|
||||
may want to use a small value for it. If you use multiple GPUs for training,
|
||||
you may increase it.
|
||||
|
||||
- ``--context-size``
|
||||
|
||||
It specifies the kernel size in the decoder. The default value 2 means it
|
||||
functions as a tri-gram LM.
|
||||
|
||||
- ``--modified-transducer-prob``
|
||||
|
||||
It specifies the probability to use modified transducer loss.
|
||||
If it is 0, then no modified transducer is used; if it is 1,
|
||||
then it uses modified transducer loss for all batches. If it is
|
||||
``p``, it applies modified transducer with probability ``p``.
|
||||
|
||||
There are some training options, e.g.,
|
||||
number of warmup steps,
|
||||
that are not passed from the commandline.
|
||||
They are pre-configured by the function ``get_params()`` in
|
||||
`transducer_stateless_modified/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/transducer_stateless_modified/train.py#L162>`_
|
||||
|
||||
If you need to change them, please modify ``./transducer_stateless_modified/train.py`` directly.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||||
Each epoch actually processes ``3x150 == 450`` hours of data.
|
||||
|
||||
Training logs
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Training logs and checkpoints are saved in the folder set by ``--exp-dir``
|
||||
(defaults to ``transducer_stateless_modified/exp``). You will find the following files in that directory:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ...
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./transducer_stateless_modified/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd transducer_stateless_modified/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --name "Aishell transducer training with icefall" --description "Training modified transducer, see https://github.com/k2-fsa/icefall/pull/219"
|
||||
|
||||
It will print something like below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
TensorFlow installation not found - running with reduced feature set.
|
||||
Upload started and will continue reading any new data as it's added to the logdir.
|
||||
|
||||
To stop uploading, press Ctrl-C.
|
||||
|
||||
New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/laGZ6HrcQxOigbFD5E0Y3Q/
|
||||
|
||||
[2022-03-03T14:29:45] Started scanning logdir.
|
||||
[2022-03-03T14:29:48] Total uploaded: 8477 scalars, 0 tensors, 0 binary objects
|
||||
Listening for new data in logdir...
|
||||
|
||||
Note there is a `URL <https://tensorboard.dev/experiment/laGZ6HrcQxOigbFD5E0Y3Q/>`_ in the
|
||||
above output, click it and you will see the following screenshot:
|
||||
|
||||
.. figure:: images/aishell-transducer_stateless_modified-tensorboard-log.png
|
||||
:width: 600
|
||||
:alt: TensorBoard screenshot
|
||||
:align: center
|
||||
:target: https://tensorboard.dev/experiment/laGZ6HrcQxOigbFD5E0Y3Q
|
||||
|
||||
TensorBoard screenshot.
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
Usage examples
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
The following shows typical use cases:
|
||||
|
||||
**Case 1**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/train.py --max-duration 250
|
||||
|
||||
It uses ``--max-duration`` of 250 to avoid OOM.
|
||||
|
||||
|
||||
**Case 2**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,3"
|
||||
$ ./transducer_stateless_modified/train.py --world-size 2
|
||||
|
||||
It uses GPU 0 and GPU 3 for DDP training.
|
||||
|
||||
**Case 3**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/train.py --num-epochs 10 --start-epoch 3
|
||||
|
||||
It loads checkpoint ``./transducer_stateless_modified/exp/epoch-2.pt`` and starts
|
||||
training from epoch 3. Also, it trains for 10 epochs.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/decode.py --help
|
||||
|
||||
shows the options for decoding.
|
||||
|
||||
The commonly used options are:
|
||||
|
||||
- ``--method``
|
||||
|
||||
This specifies the decoding method. Currently, it supports:
|
||||
|
||||
- **greedy_search**. You can provide the commandline option ``--max-sym-per-frame``
|
||||
to limit the maximum number of symbols that can be emitted per frame.
|
||||
|
||||
- **beam_search**. You can provide the commandline option ``--beam-size``.
|
||||
|
||||
- **modified_beam_search**. You can also provide the commandline option ``--beam-size``.
|
||||
To use this method, we assume that you have trained your model with modified transducer,
|
||||
i.e., used the option ``--modified-transducer-prob`` in the training.
|
||||
|
||||
The following command uses greedy search for decoding
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/decode.py \
|
||||
--epoch 64 \
|
||||
--avg 33 \
|
||||
--exp-dir ./transducer_stateless_modified/exp \
|
||||
--max-duration 100 \
|
||||
--decoding-method greedy_search \
|
||||
--max-sym-per-frame 1
|
||||
|
||||
The following command uses beam search for decoding
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/decode.py \
|
||||
--epoch 64 \
|
||||
--avg 33 \
|
||||
--exp-dir ./transducer_stateless_modified/exp \
|
||||
--max-duration 100 \
|
||||
--decoding-method beam_search \
|
||||
--beam-size 4
|
||||
|
||||
The following command uses ``modified`` beam search for decoding
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/decode.py \
|
||||
--epoch 64 \
|
||||
--avg 33 \
|
||||
--exp-dir ./transducer_stateless_modified/exp \
|
||||
--max-duration 100 \
|
||||
--decoding-method modified_beam_search \
|
||||
--beam-size 4
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It has the same meaning as the one used in training. A larger
|
||||
value may cause OOM.
|
||||
|
||||
- ``--epoch``
|
||||
|
||||
It specifies the checkpoint from which epoch that should be used for decoding.
|
||||
|
||||
- ``--avg``
|
||||
|
||||
It specifies the number of models to average. For instance, if it is 3 and if
|
||||
``--epoch=10``, then it averages the checkpoints ``epoch-8.pt``, ``epoch-9.pt``,
|
||||
and ``epoch-10.pt`` and the averaged checkpoint is used for decoding.
|
||||
|
||||
After decoding, you can find the decoding logs and results in `exp_dir/log/<decoding_method>`, e.g.,
|
||||
``exp_dir/log/greedy_search``.
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded a pre-trained model to
|
||||
`<https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01>`_
|
||||
|
||||
We describe how to use the pre-trained model to transcribe a sound file or
|
||||
multiple sound files in the following.
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The following commands describe how to download the pre-trained model:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01
|
||||
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ tree tmp/icefall-aishell-transducer-stateless-modified-2022-03-01
|
||||
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/
|
||||
|-- README.md
|
||||
|-- data
|
||||
| `-- lang_char
|
||||
| |-- L.pt
|
||||
| |-- lexicon.txt
|
||||
| |-- tokens.txt
|
||||
| `-- words.txt
|
||||
|-- exp
|
||||
| `-- pretrained.pt
|
||||
|-- log
|
||||
| |-- errs-test-beam_4-epoch-64-avg-33-beam-4.txt
|
||||
| |-- errs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
|
||||
| |-- log-decode-epoch-64-avg-33-beam-4-2022-03-02-12-05-03
|
||||
| |-- log-decode-epoch-64-avg-33-context-2-max-sym-per-frame-1-2022-02-28-18-13-07
|
||||
| |-- recogs-test-beam_4-epoch-64-avg-33-beam-4.txt
|
||||
| `-- recogs-test-greedy_search-epoch-64-avg-33-context-2-max-sym-per-frame-1.txt
|
||||
`-- test_wavs
|
||||
|-- BAC009S0764W0121.wav
|
||||
|-- BAC009S0764W0122.wav
|
||||
|-- BAC009S0764W0123.wav
|
||||
`-- transcript.txt
|
||||
|
||||
5 directories, 16 files
|
||||
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_char``
|
||||
|
||||
It contains language related files. You can find the vocabulary size in ``tokens.txt``.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-32.pt`` to ``epoch-64.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``log``
|
||||
|
||||
It contains decoding logs and decoded results.
|
||||
|
||||
- ``test_wavs``
|
||||
|
||||
It contains some test sound files from Aishell ``test`` dataset.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ soxi tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/*.wav
|
||||
|
||||
Input File : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
|
||||
File Size : 135k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
|
||||
File Size : 132k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
|
||||
File Size : 128k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
Total Duration of 3 files: 00:00:12.32
|
||||
|
||||
Usage
|
||||
~~~~~
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/pretrained.py --help
|
||||
|
||||
displays the help information.
|
||||
|
||||
It supports three decoding methods:
|
||||
|
||||
- greedy search
|
||||
- beam search
|
||||
- modified beam search
|
||||
|
||||
.. note::
|
||||
|
||||
In modified beam search, it limits the maximum number of symbols that can be
|
||||
emitted per frame to 1. To use this method, you have to ensure that your model
|
||||
has been trained with the option ``--modified-transducer-prob``. Otherwise,
|
||||
it may give you poor results.
|
||||
|
||||
Greedy search
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
The command to run greedy search is given below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./transducer_stateless_modified/pretrained.py \
|
||||
--checkpoint ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt \
|
||||
--lang-dir ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char \
|
||||
--method greedy_search \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is as follows:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2022-03-03 15:35:26,531 INFO [pretrained.py:239] device: cuda:0
|
||||
2022-03-03 15:35:26,994 INFO [lexicon.py:176] Loading pre-compiled tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char/Linv.pt
|
||||
2022-03-03 15:35:27,027 INFO [pretrained.py:246] {'feature_dim': 80, 'encoder_out_dim': 512, 'subsampling_factor': 4, 'attention_dim': 512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'vgg_frontend': False, 'env_info': {'k2-version': '1.13', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f4fefe4882bc0ae59af951da3f47335d5495ef71', 'k2-git-date': 'Thu Feb 10 15:16:02 2022', 'lhotse-version': '1.0.0.dev+missing.version.file', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '50d2281-clean', 'icefall-git-date': 'Wed Mar 2 16:02:38 2022', 'icefall-path': '/ceph-fj/fangjun/open-source-2/icefall-aishell', 'k2-path': '/ceph-fj/fangjun/open-source-2/k2-multi-datasets/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-fj/fangjun/open-source-2/lhotse-aishell/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0815224919-75d558775b-mmnv8', 'IP address': '10.177.72.138'}, 'sample_rate': 16000, 'checkpoint': './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt', 'lang_dir': PosixPath('tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char'), 'method': 'greedy_search', 'sound_files': ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'], 'beam_size': 4, 'context_size': 2, 'max_sym_per_frame': 3, 'blank_id': 0, 'vocab_size': 4336}
|
||||
2022-03-03 15:35:27,027 INFO [pretrained.py:248] About to create model
|
||||
2022-03-03 15:35:36,878 INFO [pretrained.py:257] Constructing Fbank computer
|
||||
2022-03-03 15:35:36,880 INFO [pretrained.py:267] Reading sound files: ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav']
|
||||
2022-03-03 15:35:36,891 INFO [pretrained.py:273] Decoding started
|
||||
/ceph-fj/fangjun/open-source-2/icefall-aishell/egs/aishell/ASR/transducer_stateless_modified/conformer.py:113: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
||||
lengths = ((x_lens - 1) // 2 - 1) // 2
|
||||
2022-03-03 15:35:37,163 INFO [pretrained.py:320]
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav:
|
||||
甚 至 出 现 交 易 几 乎 停 滞 的 情 况
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav:
|
||||
一 二 线 城 市 虽 然 也 处 于 调 整 中
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav:
|
||||
但 因 为 聚 集 了 过 多 公 共 资 源
|
||||
|
||||
2022-03-03 15:35:37,163 INFO [pretrained.py:322] Decoding Done
|
||||
|
||||
Beam search
|
||||
^^^^^^^^^^^
|
||||
|
||||
The command to run beam search is given below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
|
||||
$ ./transducer_stateless_modified/pretrained.py \
|
||||
--checkpoint ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt \
|
||||
--lang-dir ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char \
|
||||
--method beam_search \
|
||||
--beam-size 4 \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is as follows:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2022-03-03 15:39:09,285 INFO [pretrained.py:239] device: cuda:0
|
||||
2022-03-03 15:39:09,708 INFO [lexicon.py:176] Loading pre-compiled tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char/Linv.pt
|
||||
2022-03-03 15:39:09,759 INFO [pretrained.py:246] {'feature_dim': 80, 'encoder_out_dim': 512, 'subsampling_factor': 4, 'attention_dim': 512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'vgg_frontend': False, 'env_info': {'k2-version': '1.13', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f4fefe4882bc0ae59af951da3f47335d5495ef71', 'k2-git-date': 'Thu Feb 10 15:16:02 2022', 'lhotse-version': '1.0.0.dev+missing.version.file', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '50d2281-clean', 'icefall-git-date': 'Wed Mar 2 16:02:38 2022', 'icefall-path': '/ceph-fj/fangjun/open-source-2/icefall-aishell', 'k2-path': '/ceph-fj/fangjun/open-source-2/k2-multi-datasets/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-fj/fangjun/open-source-2/lhotse-aishell/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0815224919-75d558775b-mmnv8', 'IP address': '10.177.72.138'}, 'sample_rate': 16000, 'checkpoint': './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt', 'lang_dir': PosixPath('tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char'), 'method': 'beam_search', 'sound_files': ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'], 'beam_size': 4, 'context_size': 2, 'max_sym_per_frame': 3, 'blank_id': 0, 'vocab_size': 4336}
|
||||
2022-03-03 15:39:09,760 INFO [pretrained.py:248] About to create model
|
||||
2022-03-03 15:39:18,919 INFO [pretrained.py:257] Constructing Fbank computer
|
||||
2022-03-03 15:39:18,922 INFO [pretrained.py:267] Reading sound files: ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav']
|
||||
2022-03-03 15:39:18,929 INFO [pretrained.py:273] Decoding started
|
||||
/ceph-fj/fangjun/open-source-2/icefall-aishell/egs/aishell/ASR/transducer_stateless_modified/conformer.py:113: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
||||
lengths = ((x_lens - 1) // 2 - 1) // 2
|
||||
2022-03-03 15:39:21,046 INFO [pretrained.py:320]
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav:
|
||||
甚 至 出 现 交 易 几 乎 停 滞 的 情 况
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav:
|
||||
一 二 线 城 市 虽 然 也 处 于 调 整 中
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav:
|
||||
但 因 为 聚 集 了 过 多 公 共 资 源
|
||||
|
||||
2022-03-03 15:39:21,047 INFO [pretrained.py:322] Decoding Done
|
||||
|
||||
Modified Beam search
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The command to run modified beam search is given below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
|
||||
$ ./transducer_stateless_modified/pretrained.py \
|
||||
--checkpoint ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt \
|
||||
--lang-dir ./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char \
|
||||
--method modified_beam_search \
|
||||
--beam-size 4 \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is as follows:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2022-03-03 15:41:23,319 INFO [pretrained.py:239] device: cuda:0
|
||||
2022-03-03 15:41:23,798 INFO [lexicon.py:176] Loading pre-compiled tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char/Linv.pt
|
||||
2022-03-03 15:41:23,831 INFO [pretrained.py:246] {'feature_dim': 80, 'encoder_out_dim': 512, 'subsampling_factor': 4, 'attention_dim': 512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'vgg_frontend': False, 'env_info': {'k2-version': '1.13', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'f4fefe4882bc0ae59af951da3f47335d5495ef71', 'k2-git-date': 'Thu Feb 10 15:16:02 2022', 'lhotse-version': '1.0.0.dev+missing.version.file', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '50d2281-clean', 'icefall-git-date': 'Wed Mar 2 16:02:38 2022', 'icefall-path': '/ceph-fj/fangjun/open-source-2/icefall-aishell', 'k2-path': '/ceph-fj/fangjun/open-source-2/k2-multi-datasets/k2/python/k2/__init__.py', 'lhotse-path': '/ceph-fj/fangjun/open-source-2/lhotse-aishell/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0815224919-75d558775b-mmnv8', 'IP address': '10.177.72.138'}, 'sample_rate': 16000, 'checkpoint': './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/exp/pretrained.pt', 'lang_dir': PosixPath('tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/data/lang_char'), 'method': 'modified_beam_search', 'sound_files': ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav'], 'beam_size': 4, 'context_size': 2, 'max_sym_per_frame': 3, 'blank_id': 0, 'vocab_size': 4336}
|
||||
2022-03-03 15:41:23,831 INFO [pretrained.py:248] About to create model
|
||||
2022-03-03 15:41:32,214 INFO [pretrained.py:257] Constructing Fbank computer
|
||||
2022-03-03 15:41:32,215 INFO [pretrained.py:267] Reading sound files: ['./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav', './tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav']
|
||||
2022-03-03 15:41:32,220 INFO [pretrained.py:273] Decoding started
|
||||
/ceph-fj/fangjun/open-source-2/icefall-aishell/egs/aishell/ASR/transducer_stateless_modified/conformer.py:113: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
||||
lengths = ((x_lens - 1) // 2 - 1) // 2
|
||||
/ceph-fj/fangjun/open-source-2/icefall-aishell/egs/aishell/ASR/transducer_stateless_modified/beam_search.py:402: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
|
||||
topk_hyp_indexes = topk_indexes // logits.size(-1)
|
||||
2022-03-03 15:41:32,583 INFO [pretrained.py:320]
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0121.wav:
|
||||
甚 至 出 现 交 易 几 乎 停 滞 的 情 况
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0122.wav:
|
||||
一 二 线 城 市 虽 然 也 处 于 调 整 中
|
||||
|
||||
./tmp/icefall-aishell-transducer-stateless-modified-2022-03-01/test_wavs/BAC009S0764W0123.wav:
|
||||
但 因 为 聚 集 了 过 多 公 共 资 源
|
||||
|
||||
2022-03-03 15:41:32,583 INFO [pretrained.py:322] Decoding Done
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We provide a colab notebook for this recipe showing how to use a pre-trained model to
|
||||
transcribe sound files.
|
||||
|
||||
|aishell asr stateless modified transducer colab notebook|
|
||||
|
||||
.. |aishell asr stateless modified transducer colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/12jpTxJB44vzwtcmJl2DTdznW0OawPb9H?usp=sharing
|
504
_sources/recipes/aishell/tdnn_lstm_ctc.rst.txt
Normal file
@ -0,0 +1,504 @@
|
||||
TDNN-LSTM CTC
|
||||
=============
|
||||
|
||||
This tutorial shows you how to run a tdnn-lstm ctc model
|
||||
with the `Aishell <https://www.openslr.org/33>`_ dataset.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
We recommend you to use a GPU or several GPUs to run this recipe.
|
||||
|
||||
In this tutorial, you will learn:
|
||||
|
||||
- (1) How to prepare data for training and decoding
|
||||
- (2) How to start the training, either with a single GPU or multiple GPUs
|
||||
- (3) How to do decoding after training.
|
||||
- (4) How to use a pre-trained model, provided by us
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
.. HINT::
|
||||
|
||||
If you have pre-downloaded the `Aishell <https://www.openslr.org/33>`_
|
||||
dataset and the `musan <http://www.openslr.org/17/>`_ dataset, say,
|
||||
they are saved in ``/tmp/aishell`` and ``/tmp/musan``, you can modify
|
||||
the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
|
||||
``./prepare.sh`` won't re-download them.
|
||||
|
||||
.. HINT::
|
||||
|
||||
A 3-gram language model will be downloaded from huggingface, we assume you have
|
||||
intalled and initialized ``git-lfs``. If not, you could install ``git-lfs`` by
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ sudo apt-get install git-lfs
|
||||
$ git-lfs install
|
||||
|
||||
If you don't have the ``sudo`` permission, you could download the
|
||||
`git-lfs binary <https://github.com/git-lfs/git-lfs/releases>`_ here, then add it to you ``PATH``.
|
||||
|
||||
.. NOTE::
|
||||
|
||||
All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
|
||||
are saved in ``./data`` directory.
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Configurable options
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --help
|
||||
|
||||
shows you the training options that can be passed from the commandline.
|
||||
The following options are used quite often:
|
||||
|
||||
|
||||
- ``--num-epochs``
|
||||
|
||||
It is the number of epochs to train. For instance,
|
||||
``./tdnn_lstm_ctc/train.py --num-epochs 30`` trains for 30 epochs
|
||||
and generates ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-29.pt``
|
||||
in the folder ``./tdnn_lstm_ctc/exp``.
|
||||
|
||||
- ``--start-epoch``
|
||||
|
||||
It's used to resume training.
|
||||
``./tdnn_lstm_ctc/train.py --start-epoch 10`` loads the
|
||||
checkpoint ``./tdnn_lstm_ctc/exp/epoch-9.pt`` and starts
|
||||
training from epoch 10, based on the state from epoch 9.
|
||||
|
||||
- ``--world-size``
|
||||
|
||||
It is used for multi-GPU single-machine DDP training.
|
||||
|
||||
- (a) If it is 1, then no DDP training is used.
|
||||
|
||||
- (b) If it is 2, then GPU 0 and GPU 1 are used for DDP training.
|
||||
|
||||
The following shows some use cases with it.
|
||||
|
||||
**Use case 1**: You have 4 GPUs, but you only want to use GPU 0 and
|
||||
GPU 2 for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,2"
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size 2
|
||||
|
||||
**Use case 2**: You have 4 GPUs and you want to use all of them
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size 4
|
||||
|
||||
**Use case 3**: You have 4 GPUs but you only want to use GPU 3
|
||||
for training. You can do the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="3"
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size 1
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
Only multi-GPU single-machine DDP training is implemented at present.
|
||||
Multi-GPU multi-machine DDP training will be added later.
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It specifies the number of seconds over all utterances in a
|
||||
batch, before **padding**.
|
||||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||||
your are using V100 NVIDIA GPU, we recommend you to set it to ``2000``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
Due to padding, the number of seconds of all utterances in a
|
||||
batch will usually be larger than ``--max-duration``.
|
||||
|
||||
A larger value for ``--max-duration`` may cause OOM during training,
|
||||
while a smaller value may increase the training time. You have to
|
||||
tune it.
|
||||
|
||||
|
||||
Pre-configured options
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
There are some training options, e.g., weight decay,
|
||||
number of warmup steps, results dir, etc,
|
||||
that are not passed from the commandline.
|
||||
They are pre-configured by the function ``get_params()`` in
|
||||
`tdnn_lstm_ctc/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/tdnn_lstm_ctc/train.py>`_
|
||||
|
||||
You don't need to change these pre-configured parameters. If you really need to change
|
||||
them, please modify ``./tdnn_lstm_ctc/train.py`` directly.
|
||||
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||||
Each epoch actually processes ``3x150 == 450`` hours of data.
|
||||
|
||||
|
||||
Training logs
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
Training logs and checkpoints are saved in ``tdnn_lstm_ctc/exp``.
|
||||
You will find the following files in that directory:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ...
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd tdnn_lstm_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description "TDNN-LSTM CTC training for Aishell with icefall"
|
||||
|
||||
It will print something like below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
TensorFlow installation not found - running with reduced feature set.
|
||||
Upload started and will continue reading any new data as it's added to the logdir.
|
||||
|
||||
To stop uploading, press Ctrl-C.
|
||||
|
||||
New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/LJI9MWUORLOw3jkdhxwk8A/
|
||||
|
||||
[2021-09-13T11:59:23] Started scanning logdir.
|
||||
[2021-09-13T11:59:24] Total uploaded: 4454 scalars, 0 tensors, 0 binary objects
|
||||
Listening for new data in logdir...
|
||||
|
||||
Note there is a URL in the above output, click it and you will see
|
||||
the following screenshot:
|
||||
|
||||
.. figure:: images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg
|
||||
:width: 600
|
||||
:alt: TensorBoard screenshot
|
||||
:align: center
|
||||
:target: https://tensorboard.dev/experiment/LJI9MWUORLOw3jkdhxwk8A/
|
||||
|
||||
TensorBoard screenshot.
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
Usage examples
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
The following shows typical use cases:
|
||||
|
||||
**Case 1**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,3"
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size 2
|
||||
|
||||
It uses GPU 0 and GPU 3 for DDP training.
|
||||
|
||||
**Case 2**
|
||||
^^^^^^^^^^
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --num-epochs 10 --start-epoch 3
|
||||
|
||||
It loads checkpoint ``./tdnn_lstm_ctc/exp/epoch-2.pt`` and starts
|
||||
training from epoch 3. Also, it trains for 10 epochs.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/decode.py --help
|
||||
|
||||
shows the options for decoding.
|
||||
|
||||
The commonly used options are:
|
||||
|
||||
- ``--method``
|
||||
|
||||
This specifies the decoding method.
|
||||
|
||||
The following command uses attention decoder for rescoring:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/decode.py --method 1best --max-duration 100
|
||||
|
||||
- ``--max-duration``
|
||||
|
||||
It has the same meaning as the one during training. A larger
|
||||
value may cause OOM.
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded a pre-trained model to
|
||||
`<https://huggingface.co/pkufool/icefall_asr_aishell_tdnn_lstm_ctc>`_.
|
||||
|
||||
We describe how to use the pre-trained model to transcribe a sound file or
|
||||
multiple sound files in the following.
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The following commands describe how to download the pre-trained model:
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/pkufool/icefall_asr_aishell_tdnn_lstm_ctc
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
In order to use this pre-trained model, your k2 version has to be v1.7 or later.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ tree tmp
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp/
|
||||
`-- icefall_asr_aishell_tdnn_lstm_ctc
|
||||
|-- README.md
|
||||
|-- data
|
||||
| `-- lang_phone
|
||||
| |-- HLG.pt
|
||||
| |-- tokens.txt
|
||||
| `-- words.txt
|
||||
|-- exp
|
||||
| `-- pretrained.pt
|
||||
`-- test_waves
|
||||
|-- BAC009S0764W0121.wav
|
||||
|-- BAC009S0764W0122.wav
|
||||
|-- BAC009S0764W0123.wav
|
||||
`-- trans.txt
|
||||
|
||||
5 directories, 9 files
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_phone/HLG.pt``
|
||||
|
||||
It is the decoding graph.
|
||||
|
||||
- ``data/lang_phone/tokens.txt``
|
||||
|
||||
It contains tokens and their IDs.
|
||||
Provided only for convenience so that you can look up the SOS/EOS ID easily.
|
||||
|
||||
- ``data/lang_phone/words.txt``
|
||||
|
||||
It contains words and their IDs.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-18.pt`` to ``epoch-40.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``test_waves/*.wav``
|
||||
|
||||
It contains some test sound files from Aishell ``test`` dataset.
|
||||
|
||||
- ``test_waves/trans.txt``
|
||||
|
||||
It contains the reference transcripts for the sound files in `test_waves/`.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ soxi tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/*.wav
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0121.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.20 = 67263 samples ~ 315.295 CDDA sectors
|
||||
File Size : 135k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0122.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.12 = 65840 samples ~ 308.625 CDDA sectors
|
||||
File Size : 132k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0123.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.00 = 64000 samples ~ 300 CDDA sectors
|
||||
File Size : 128k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
Total Duration of 3 files: 00:00:12.32
|
||||
|
||||
Usage
|
||||
~~~~~
|
||||
|
||||
.. code-block::
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py --help
|
||||
|
||||
displays the help information.
|
||||
|
||||
|
||||
HLG decoding
|
||||
^^^^^^^^^^^^
|
||||
|
||||
HLG decoding uses the best path of the decoding lattice as the decoding result.
|
||||
|
||||
The command to run HLG decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/exp/pretrained.pt \
|
||||
--words-file ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/data/lang_phone/words.txt \
|
||||
--HLG ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/data/lang_phone/HLG.pt \
|
||||
--method 1best \
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0121.wav \
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0122.wav \
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0123.wav
|
||||
|
||||
The output is given below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-09-13 15:00:55,858 INFO [pretrained.py:140] device: cuda:0
|
||||
2021-09-13 15:00:55,858 INFO [pretrained.py:142] Creating model
|
||||
2021-09-13 15:01:05,389 INFO [pretrained.py:154] Loading HLG from ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/data/lang_phone/HLG.pt
|
||||
2021-09-13 15:01:06,531 INFO [pretrained.py:161] Constructing Fbank computer
|
||||
2021-09-13 15:01:06,536 INFO [pretrained.py:171] Reading sound files: ['./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0121.wav', './tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0122.wav', './tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0123.wav']
|
||||
2021-09-13 15:01:06,539 INFO [pretrained.py:177] Decoding started
|
||||
2021-09-13 15:01:06,917 INFO [pretrained.py:207] Use HLG decoding
|
||||
2021-09-13 15:01:07,129 INFO [pretrained.py:220]
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0121.wav:
|
||||
甚至 出现 交易 几乎 停滞 的 情况
|
||||
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0122.wav:
|
||||
一二 线 城市 虽然 也 处于 调整 中
|
||||
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0123.wav:
|
||||
但 因为 聚集 了 过多 公共 资源
|
||||
|
||||
|
||||
2021-09-13 15:01:07,129 INFO [pretrained.py:222] Decoding Done
|
||||
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We do provide a colab notebook for this recipe showing how to use a pre-trained model.
|
||||
|
||||
|aishell asr conformer ctc colab notebook|
|
||||
|
||||
.. |aishell asr conformer ctc colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1qULaGvXq7PCu_P61oubfz9b53JzY4H3z
|
||||
|
||||
**Congratulations!** You have finished the aishell ASR recipe with
|
||||
TDNN-LSTM CTC models in ``icefall``.
|
19
_sources/recipes/index.rst.txt
Normal file
@ -0,0 +1,19 @@
|
||||
Recipes
|
||||
=======
|
||||
|
||||
This page contains various recipes in ``icefall``.
|
||||
Currently, only speech recognition recipes are provided.
|
||||
|
||||
We may add recipes for other tasks as well in the future.
|
||||
|
||||
.. we put the yesno recipe as the first recipe since it is the simplest one.
|
||||
.. Other recipes are listed in a alphabetical order.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Table of Contents
|
||||
|
||||
aishell/index
|
||||
librispeech/index
|
||||
timit/index
|
||||
yesno/index
|
1070
_sources/recipes/librispeech/conformer_ctc.rst.txt
Normal file
8
_sources/recipes/librispeech/index.rst.txt
Normal file
@ -0,0 +1,8 @@
|
||||
LibriSpeech
|
||||
===========
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
tdnn_lstm_ctc
|
||||
conformer_ctc
|
404
_sources/recipes/librispeech/tdnn_lstm_ctc.rst.txt
Normal file
@ -0,0 +1,404 @@
|
||||
TDNN-LSTM-CTC
|
||||
=============
|
||||
|
||||
This tutorial shows you how to run a TDNN-LSTM-CTC model with the `LibriSpeech <https://www.openslr.org/12>`_ dataset.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
We provide the following YouTube video showing how to run ``./prepare.sh``.
|
||||
|
||||
.. note::
|
||||
|
||||
To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
|
||||
the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
|
||||
|
||||
`<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
|
||||
|
||||
.. youtube:: ofEIoJL-mGM
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Now describing the training of TDNN-LSTM-CTC model, contained in
|
||||
the `tdnn_lstm_ctc <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/tdnn_lstm_ctc>`_
|
||||
folder.
|
||||
|
||||
The command to run the training part is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0,1,2,3"
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size 4
|
||||
|
||||
By default, it will run ``20`` epochs. Training logs and checkpoints are saved
|
||||
in ``tdnn_lstm_ctc/exp``.
|
||||
|
||||
In ``tdnn_lstm_ctc/exp``, you will find the following files:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-19.pt``
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd tdnn_lstm_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description "TDNN LSTM training for librispeech with icefall"
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
|
||||
To see available training options, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/train.py --help
|
||||
|
||||
Other training options, e.g., learning rate, results dir, etc., are
|
||||
pre-configured in the function ``get_params()``
|
||||
in `tdnn_lstm_ctc/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/tdnn_lstm_ctc/train.py>`_.
|
||||
Normally, you don't need to change them. You can change them by modifying the code, if
|
||||
you want.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
The command for decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES="0"
|
||||
$ ./tdnn_lstm_ctc/decode.py
|
||||
|
||||
You will see the WER in the output log.
|
||||
|
||||
Decoded results are saved in ``tdnn_lstm_ctc/exp``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/decode.py --help
|
||||
|
||||
shows you the available decoding options.
|
||||
|
||||
Some commonly used options are:
|
||||
|
||||
- ``--epoch``
|
||||
|
||||
You can select which checkpoint to be used for decoding.
|
||||
For instance, ``./tdnn_lstm_ctc/decode.py --epoch 10`` means to use
|
||||
``./tdnn_lstm_ctc/exp/epoch-10.pt`` for decoding.
|
||||
|
||||
- ``--avg``
|
||||
|
||||
It's related to model averaging. It specifies number of checkpoints
|
||||
to be averaged. The averaged model is used for decoding.
|
||||
For example, the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/decode.py --epoch 10 --avg 3
|
||||
|
||||
uses the average of ``epoch-8.pt``, ``epoch-9.pt`` and ``epoch-10.pt``
|
||||
for decoding.
|
||||
|
||||
- ``--export``
|
||||
|
||||
If it is ``True``, i.e., ``./tdnn_lstm_ctc/decode.py --export 1``, the code
|
||||
will save the averaged model to ``tdnn_lstm_ctc/exp/pretrained.pt``.
|
||||
See :ref:`tdnn_lstm_ctc use a pre-trained model` for how to use it.
|
||||
|
||||
|
||||
.. _tdnn_lstm_ctc use a pre-trained model:
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded the pre-trained model to
|
||||
`<https://huggingface.co/pkufool/icefall_asr_librispeech_tdnn-lstm_ctc>`_.
|
||||
|
||||
The following shows you how to use the pre-trained model.
|
||||
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/pkufool/icefall_asr_librispeech_tdnn-lstm_ctc
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
In order to use this pre-trained model, your k2 version has to be v1.7 or later.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ tree tmp
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp/
|
||||
`-- icefall_asr_librispeech_tdnn-lstm_ctc
|
||||
|-- README.md
|
||||
|-- data
|
||||
| |-- lang_phone
|
||||
| | |-- HLG.pt
|
||||
| | |-- tokens.txt
|
||||
| | `-- words.txt
|
||||
| `-- lm
|
||||
| `-- G_4_gram.pt
|
||||
|-- exp
|
||||
| `-- pretrained.pt
|
||||
`-- test_wavs
|
||||
|-- 1089-134686-0001.flac
|
||||
|-- 1221-135766-0001.flac
|
||||
|-- 1221-135766-0002.flac
|
||||
`-- trans.txt
|
||||
|
||||
6 directories, 10 files
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_phone/HLG.pt``
|
||||
|
||||
It is the decoding graph.
|
||||
|
||||
- ``data/lang_phone/tokens.txt``
|
||||
|
||||
It contains tokens and their IDs.
|
||||
|
||||
- ``data/lang_phone/words.txt``
|
||||
|
||||
It contains words and their IDs.
|
||||
|
||||
- ``data/lm/G_4_gram.pt``
|
||||
|
||||
It is a 4-gram LM, useful for LM rescoring.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-14.pt`` to ``epoch-19.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``test_waves/*.flac``
|
||||
|
||||
It contains some test sound files from LibriSpeech ``test-clean`` dataset.
|
||||
|
||||
- ``test_waves/trans.txt``
|
||||
|
||||
It contains the reference transcripts for the sound files in ``test_waves/``.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ soxi tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/*.flac
|
||||
|
||||
Input File : 'tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:06.62 = 106000 samples ~ 496.875 CDDA sectors
|
||||
File Size : 116k
|
||||
Bit Rate : 140k
|
||||
Sample Encoding: 16-bit FLAC
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:16.71 = 267440 samples ~ 1253.62 CDDA sectors
|
||||
File Size : 343k
|
||||
Bit Rate : 164k
|
||||
Sample Encoding: 16-bit FLAC
|
||||
|
||||
|
||||
Input File : 'tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac'
|
||||
Channels : 1
|
||||
Sample Rate : 16000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:04.83 = 77200 samples ~ 361.875 CDDA sectors
|
||||
File Size : 105k
|
||||
Bit Rate : 174k
|
||||
Sample Encoding: 16-bit FLAC
|
||||
|
||||
Total Duration of 3 files: 00:00:28.16
|
||||
|
||||
|
||||
Inference with a pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/librispeech/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py --help
|
||||
|
||||
shows the usage information of ``./tdnn_lstm_ctc/pretrained.py``.
|
||||
|
||||
To decode with ``1best`` method, we can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_lstm_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/exp/pretraind.pt \
|
||||
--words-file ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/words.txt \
|
||||
--HLG ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/HLG.pt \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-24 16:57:13,315 INFO [pretrained.py:168] device: cuda:0
|
||||
2021-08-24 16:57:13,315 INFO [pretrained.py:170] Creating model
|
||||
2021-08-24 16:57:18,331 INFO [pretrained.py:182] Loading HLG from ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/HLG.pt
|
||||
2021-08-24 16:57:27,581 INFO [pretrained.py:199] Constructing Fbank computer
|
||||
2021-08-24 16:57:27,584 INFO [pretrained.py:209] Reading sound files: ['./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac']
|
||||
2021-08-24 16:57:27,599 INFO [pretrained.py:215] Decoding started
|
||||
2021-08-24 16:57:27,791 INFO [pretrained.py:245] Use HLG decoding
|
||||
2021-08-24 16:57:28,098 INFO [pretrained.py:266]
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac:
|
||||
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac:
|
||||
GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONORED BOSOM TO CONNECT HER PARENT FOREVER WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN
|
||||
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac:
|
||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||
|
||||
|
||||
2021-08-24 16:57:28,099 INFO [pretrained.py:268] Decoding Done
|
||||
|
||||
|
||||
To decode with ``whole-lattice-rescoring`` methond, you can use
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_lstm_ctc/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/exp/pretraind.pt \
|
||||
--words-file ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/words.txt \
|
||||
--HLG ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/HLG.pt \
|
||||
--method whole-lattice-rescoring \
|
||||
--G ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lm/G_4_gram.pt \
|
||||
--ngram-lm-scale 0.8 \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac \
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac
|
||||
|
||||
The decoding output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-24 16:39:24,725 INFO [pretrained.py:168] device: cuda:0
|
||||
2021-08-24 16:39:24,725 INFO [pretrained.py:170] Creating model
|
||||
2021-08-24 16:39:29,403 INFO [pretrained.py:182] Loading HLG from ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lang_phone/HLG.pt
|
||||
2021-08-24 16:39:40,631 INFO [pretrained.py:190] Loading G from ./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/data/lm/G_4_gram.pt
|
||||
2021-08-24 16:39:53,098 INFO [pretrained.py:199] Constructing Fbank computer
|
||||
2021-08-24 16:39:53,107 INFO [pretrained.py:209] Reading sound files: ['./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac', './tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac', './tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac']
|
||||
2021-08-24 16:39:53,121 INFO [pretrained.py:215] Decoding started
|
||||
2021-08-24 16:39:53,443 INFO [pretrained.py:250] Use HLG decoding + LM rescoring
|
||||
2021-08-24 16:39:54,010 INFO [pretrained.py:266]
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1089-134686-0001.flac:
|
||||
AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
|
||||
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0001.flac:
|
||||
GOD AS A DIRECT CONSEQUENCE OF THE SIN WHICH MAN THUS PUNISHED HAD GIVEN HER A LOVELY CHILD WHOSE PLACE WAS ON THAT SAME DISHONORED BOSOM TO CONNECT HER PARENT FOREVER WITH THE RACE AND DESCENT OF MORTALS AND TO BE FINALLY A BLESSED SOUL IN HEAVEN
|
||||
|
||||
./tmp/icefall_asr_librispeech_tdnn-lstm_ctc/test_wavs/1221-135766-0002.flac:
|
||||
YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
|
||||
|
||||
|
||||
2021-08-24 16:39:54,010 INFO [pretrained.py:268] Decoding Done
|
||||
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We provide a colab notebook for decoding with pre-trained model.
|
||||
|
||||
|librispeech tdnn_lstm_ctc colab notebook|
|
||||
|
||||
.. |librispeech tdnn_lstm_ctc colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1kNmDXNMwREi0rZGAOIAOJo93REBuOTcd
|
||||
|
||||
|
||||
**Congratulations!** You have finished the TDNN-LSTM-CTC recipe on librispeech in ``icefall``.
|
9
_sources/recipes/timit/index.rst.txt
Normal file
@ -0,0 +1,9 @@
|
||||
TIMIT
|
||||
=====
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
tdnn_ligru_ctc
|
||||
tdnn_lstm_ctc
|
||||
|
406
_sources/recipes/timit/tdnn_ligru_ctc.rst.txt
Normal file
@ -0,0 +1,406 @@
|
||||
TDNN-LiGRU-CTC
|
||||
==============
|
||||
|
||||
This tutorial shows you how to run a TDNN-LiGRU-CTC model with the `TIMIT <https://data.deepai.org/timit.zip>`_ dataset.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Now describing the training of TDNN-LiGRU-CTC model, contained in
|
||||
the `tdnn_ligru_ctc <https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_ligru_ctc>`_
|
||||
folder.
|
||||
|
||||
.. HINT::
|
||||
|
||||
TIMIT is a very small dataset. So one GPU is enough.
|
||||
|
||||
The command to run the training part is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0"
|
||||
$ ./tdnn_ligru_ctc/train.py
|
||||
|
||||
By default, it will run ``25`` epochs. Training logs and checkpoints are saved
|
||||
in ``tdnn_ligru_ctc/exp``.
|
||||
|
||||
In ``tdnn_ligru_ctc/exp``, you will find the following files:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-29.pt``
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_ligru_ctc/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd tdnn_ligru_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description "TDNN ligru training for timit with icefall"
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
|
||||
To see available training options, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_ligru_ctc/train.py --help
|
||||
|
||||
Other training options, e.g., learning rate, results dir, etc., are
|
||||
pre-configured in the function ``get_params()``
|
||||
in `tdnn_ligru_ctc/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/timit/ASR/tdnn_ligru_ctc/train.py>`_.
|
||||
Normally, you don't need to change them. You can change them by modifying the code, if
|
||||
you want.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
The command for decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES="0"
|
||||
$ ./tdnn_ligru_ctc/decode.py
|
||||
|
||||
You will see the WER in the output log.
|
||||
|
||||
Decoded results are saved in ``tdnn_ligru_ctc/exp``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_ligru_ctc/decode.py --help
|
||||
|
||||
shows you the available decoding options.
|
||||
|
||||
Some commonly used options are:
|
||||
|
||||
- ``--epoch``
|
||||
|
||||
You can select which checkpoint to be used for decoding.
|
||||
For instance, ``./tdnn_ligru_ctc/decode.py --epoch 10`` means to use
|
||||
``./tdnn_ligru_ctc/exp/epoch-10.pt`` for decoding.
|
||||
|
||||
- ``--avg``
|
||||
|
||||
It's related to model averaging. It specifies number of checkpoints
|
||||
to be averaged. The averaged model is used for decoding.
|
||||
For example, the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_ligru_ctc/decode.py --epoch 25 --avg 17
|
||||
|
||||
uses the average of ``epoch-9.pt``, ``epoch-10.pt``, ``epoch-11.pt``,
|
||||
``epoch-12.pt``, ``epoch-13.pt``, ``epoch-14.pt``, ``epoch-15.pt``,
|
||||
``epoch-16.pt``, ``epoch-17.pt``, ``epoch-18.pt``, ``epoch-19.pt``,
|
||||
``epoch-20.pt``, ``epoch-21.pt``, ``epoch-22.pt``, ``epoch-23.pt``,
|
||||
``epoch-24.pt`` and ``epoch-25.pt``
|
||||
for decoding.
|
||||
|
||||
- ``--export``
|
||||
|
||||
If it is ``True``, i.e., ``./tdnn_ligru_ctc/decode.py --export 1``, the code
|
||||
will save the averaged model to ``tdnn_ligru_ctc/exp/pretrained.pt``.
|
||||
See :ref:`tdnn_ligru_ctc use a pre-trained model` for how to use it.
|
||||
|
||||
|
||||
.. _tdnn_ligru_ctc use a pre-trained model:
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded the pre-trained model to
|
||||
`<https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_ligru_ctc>`_.
|
||||
|
||||
The following shows you how to use the pre-trained model.
|
||||
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ mkdir tmp-ligru
|
||||
$ cd tmp-ligru
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_ligru_ctc
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
In order to use this pre-trained model, your k2 version has to be v1.7 or later.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ tree tmp-ligru
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp-ligru/
|
||||
`-- icefall_asr_timit_tdnn_ligru_ctc
|
||||
|-- README.md
|
||||
|-- data
|
||||
| |-- lang_phone
|
||||
| | |-- HLG.pt
|
||||
| | |-- tokens.txt
|
||||
| | `-- words.txt
|
||||
| `-- lm
|
||||
| `-- G_4_gram.pt
|
||||
|-- exp
|
||||
| `-- pretrained_average_9_25.pt
|
||||
`-- test_wavs
|
||||
|-- FDHC0_SI1559.WAV
|
||||
|-- FELC0_SI756.WAV
|
||||
|-- FMGD0_SI1564.WAV
|
||||
`-- trans.txt
|
||||
|
||||
6 directories, 10 files
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_phone/HLG.pt``
|
||||
|
||||
It is the decoding graph.
|
||||
|
||||
- ``data/lang_phone/tokens.txt``
|
||||
|
||||
It contains tokens and their IDs.
|
||||
|
||||
- ``data/lang_phone/words.txt``
|
||||
|
||||
It contains words and their IDs.
|
||||
|
||||
- ``data/lm/G_4_gram.pt``
|
||||
|
||||
It is a 4-gram LM, useful for LM rescoring.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-9.pt`` to ``epoch-25.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``test_waves/*.WAV``
|
||||
|
||||
It contains some test sound files from timit ``TEST`` dataset.
|
||||
|
||||
- ``test_waves/trans.txt``
|
||||
|
||||
It contains the reference transcripts for the sound files in ``test_waves/``.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ffprobe -show_format tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : dhc0_si1559
|
||||
sample_min : -4176
|
||||
sample_max : 5984
|
||||
Duration: 00:00:03.40, bitrate: 258 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
$ ffprobe -show_format tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : elc0_si756
|
||||
sample_min : -1546
|
||||
sample_max : 1989
|
||||
Duration: 00:00:04.19, bitrate: 257 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
$ ffprobe -show_format tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : mgd0_si1564
|
||||
sample_min : -7626
|
||||
sample_max : 10573
|
||||
Duration: 00:00:04.44, bitrate: 257 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
|
||||
Inference with a pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./tdnn_ligru_ctc/pretrained.py --help
|
||||
|
||||
shows the usage information of ``./tdnn_ligru_ctc/pretrained.py``.
|
||||
|
||||
To decode with ``1best`` method, we can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_ligru_ctc/pretrained.py
|
||||
--method 1best
|
||||
--checkpoint ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/exp/pretrained_average_9_25.pt
|
||||
--words-file ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/words.txt
|
||||
--HLG ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/HLG.pt
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-08 20:41:33,660 INFO [pretrained.py:169] device: cuda:0
|
||||
2021-11-08 20:41:33,660 INFO [pretrained.py:171] Creating model
|
||||
2021-11-08 20:41:38,680 INFO [pretrained.py:183] Loading HLG from ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/HLG.pt
|
||||
2021-11-08 20:41:38,695 INFO [pretrained.py:200] Constructing Fbank computer
|
||||
2021-11-08 20:41:38,697 INFO [pretrained.py:210] Reading sound files: ['./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV', './tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV', './tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV']
|
||||
2021-11-08 20:41:38,704 INFO [pretrained.py:216] Decoding started
|
||||
2021-11-08 20:41:39,819 INFO [pretrained.py:246] Use HLG decoding
|
||||
2021-11-08 20:41:39,829 INFO [pretrained.py:267]
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV:
|
||||
sil dh ih sh uw ah l iy v iy z ih sil p r aa sil k s ih m ey dx ih sil d w uh dx ih w ih s f iy l ih ng w ih th ih n ih m s eh l f sil jh
|
||||
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV:
|
||||
sil m ih sil t ih r iy s sil s er r ih m ih sil m aa l ih sil k l ey sil r eh sil d w ay sil d aa r sil b ah f sil jh
|
||||
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV:
|
||||
sil hh ah z sil b ih sil g r iy w ah z sil d aw n ih sil b ay s sil n ey sil w eh l f eh n s ih z eh n dh eh r w er sil g r ey z ih ng sil k ae dx l sil
|
||||
|
||||
|
||||
2021-11-08 20:41:39,829 INFO [pretrained.py:269] Decoding Done
|
||||
|
||||
|
||||
To decode with ``whole-lattice-rescoring`` methond, you can use
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_ligru_ctc/pretrained.py \
|
||||
--method whole-lattice-rescoring \
|
||||
--checkpoint ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/exp/pretrained_average_9_25.pt \
|
||||
--words-file ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/words.txt \
|
||||
--HLG ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/HLG.pt \
|
||||
--G ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lm/G_4_gram.pt \
|
||||
--ngram-lm-scale 0.1 \
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
The decoding output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-08 20:37:50,693 INFO [pretrained.py:169] device: cuda:0
|
||||
2021-11-08 20:37:50,693 INFO [pretrained.py:171] Creating model
|
||||
2021-11-08 20:37:54,693 INFO [pretrained.py:183] Loading HLG from ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lang_phone/HLG.pt
|
||||
2021-11-08 20:37:54,705 INFO [pretrained.py:191] Loading G from ./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/data/lm/G_4_gram.pt
|
||||
2021-11-08 20:37:54,714 INFO [pretrained.py:200] Constructing Fbank computer
|
||||
2021-11-08 20:37:54,715 INFO [pretrained.py:210] Reading sound files: ['./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV', './tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV', './tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV']
|
||||
2021-11-08 20:37:54,720 INFO [pretrained.py:216] Decoding started
|
||||
2021-11-08 20:37:55,808 INFO [pretrained.py:251] Use HLG decoding + LM rescoring
|
||||
2021-11-08 20:37:56,348 INFO [pretrained.py:267]
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FDHC0_SI1559.WAV:
|
||||
sil dh ih sh uw ah l iy v iy z ah sil p r aa sil k s ih m ey dx ih sil d w uh dx iy w ih s f iy l iy ng w ih th ih n ih m s eh l f sil jh
|
||||
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FELC0_SI756.WAV:
|
||||
sil m ih sil t ih r iy l s sil s er r eh m ih sil m aa l ih ng sil k l ey sil r eh sil d w ay sil d aa r sil b ah f sil jh ch
|
||||
|
||||
./tmp-ligru/icefall_asr_timit_tdnn_ligru_ctc/test_waves/FMGD0_SI1564.WAV:
|
||||
sil hh ah z sil b ih n sil g r iy w ah z sil b aw n ih sil b ay s sil n ey sil w er l f eh n s ih z eh n dh eh r w er sil g r ey z ih ng sil k ae dx l sil
|
||||
|
||||
|
||||
2021-11-08 20:37:56,348 INFO [pretrained.py:269] Decoding Done
|
||||
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We provide a colab notebook for decoding with pre-trained model.
|
||||
|
||||
|timit tdnn_ligru_ctc colab notebook|
|
||||
|
||||
.. |timit tdnn_ligru_ctc colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/11IT-k4HQIgQngXz1uvWsEYktjqQt7Tmb
|
||||
|
||||
|
||||
**Congratulations!** You have finished the TDNN-LiGRU-CTC recipe on timit in ``icefall``.
|
404
_sources/recipes/timit/tdnn_lstm_ctc.rst.txt
Normal file
@ -0,0 +1,404 @@
|
||||
TDNN-LSTM-CTC
|
||||
=============
|
||||
|
||||
This tutorial shows you how to run a TDNN-LSTM-CTC model with the `TIMIT <https://data.deepai.org/timit.zip>`_ dataset.
|
||||
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
Now describing the training of TDNN-LSTM-CTC model, contained in
|
||||
the `tdnn_lstm_ctc <https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_lstm_ctc>`_
|
||||
folder.
|
||||
|
||||
.. HINT::
|
||||
|
||||
TIMIT is a very small dataset. So one GPU for training is enough.
|
||||
|
||||
The command to run the training part is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES="0"
|
||||
$ ./tdnn_lstm_ctc/train.py
|
||||
|
||||
By default, it will run ``25`` epochs. Training logs and checkpoints are saved
|
||||
in ``tdnn_lstm_ctc/exp``.
|
||||
|
||||
In ``tdnn_lstm_ctc/exp``, you will find the following files:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ..., ``epoch-29.pt``
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd tdnn_lstm_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description "TDNN LSTM training for timit with icefall"
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
|
||||
To see available training options, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/train.py --help
|
||||
|
||||
Other training options, e.g., learning rate, results dir, etc., are
|
||||
pre-configured in the function ``get_params()``
|
||||
in `tdnn_lstm_ctc/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/timit/ASR/tdnn_lstm_ctc/train.py>`_.
|
||||
Normally, you don't need to change them. You can change them by modifying the code, if
|
||||
you want.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
The command for decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES="0"
|
||||
$ ./tdnn_lstm_ctc/decode.py
|
||||
|
||||
You will see the WER in the output log.
|
||||
|
||||
Decoded results are saved in ``tdnn_lstm_ctc/exp``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/decode.py --help
|
||||
|
||||
shows you the available decoding options.
|
||||
|
||||
Some commonly used options are:
|
||||
|
||||
- ``--epoch``
|
||||
|
||||
You can select which checkpoint to be used for decoding.
|
||||
For instance, ``./tdnn_lstm_ctc/decode.py --epoch 10`` means to use
|
||||
``./tdnn_lstm_ctc/exp/epoch-10.pt`` for decoding.
|
||||
|
||||
- ``--avg``
|
||||
|
||||
It's related to model averaging. It specifies number of checkpoints
|
||||
to be averaged. The averaged model is used for decoding.
|
||||
For example, the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn_lstm_ctc/decode.py --epoch 25 --avg 10
|
||||
|
||||
uses the average of ``epoch-16.pt``, ``epoch-17.pt``, ``epoch-18.pt``,
|
||||
``epoch-19.pt``, ``epoch-20.pt``, ``epoch-21.pt``, ``epoch-22.pt``,
|
||||
``epoch-23.pt``, ``epoch-24.pt`` and ``epoch-25.pt``
|
||||
for decoding.
|
||||
|
||||
- ``--export``
|
||||
|
||||
If it is ``True``, i.e., ``./tdnn_lstm_ctc/decode.py --export 1``, the code
|
||||
will save the averaged model to ``tdnn_lstm_ctc/exp/pretrained.pt``.
|
||||
See :ref:`tdnn_lstm_ctc use a pre-trained model` for how to use it.
|
||||
|
||||
|
||||
.. _tdnn_lstm_ctc use a pre-trained model:
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded the pre-trained model to
|
||||
`<https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_lstm_ctc>`_.
|
||||
|
||||
The following shows you how to use the pre-trained model.
|
||||
|
||||
|
||||
Install kaldifeat
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.
|
||||
|
||||
Please refer to `<https://github.com/csukuangfj/kaldifeat>`_ for installation.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ mkdir tmp-lstm
|
||||
$ cd tmp-lstm
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/luomingshuang/icefall_asr_timit_tdnn_lstm_ctc
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
In order to use this pre-trained model, your k2 version has to be v1.7 or later.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ tree tmp-lstm
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp-lstm/
|
||||
`-- icefall_asr_timit_tdnn_lstm_ctc
|
||||
|-- README.md
|
||||
|-- data
|
||||
| |-- lang_phone
|
||||
| | |-- HLG.pt
|
||||
| | |-- tokens.txt
|
||||
| | `-- words.txt
|
||||
| `-- lm
|
||||
| `-- G_4_gram.pt
|
||||
|-- exp
|
||||
| `-- pretrained_average_16_25.pt
|
||||
`-- test_wavs
|
||||
|-- FDHC0_SI1559.WAV
|
||||
|-- FELC0_SI756.WAV
|
||||
|-- FMGD0_SI1564.WAV
|
||||
`-- trans.txt
|
||||
|
||||
6 directories, 10 files
|
||||
|
||||
**File descriptions**:
|
||||
|
||||
- ``data/lang_phone/HLG.pt``
|
||||
|
||||
It is the decoding graph.
|
||||
|
||||
- ``data/lang_phone/tokens.txt``
|
||||
|
||||
It contains tokens and their IDs.
|
||||
|
||||
- ``data/lang_phone/words.txt``
|
||||
|
||||
It contains words and their IDs.
|
||||
|
||||
- ``data/lm/G_4_gram.pt``
|
||||
|
||||
It is a 4-gram LM, useful for LM rescoring.
|
||||
|
||||
- ``exp/pretrained.pt``
|
||||
|
||||
It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from ``epoch-16.pt`` to ``epoch-25.pt``.
|
||||
Note: We have removed optimizer ``state_dict`` to reduce file size.
|
||||
|
||||
- ``test_waves/*.WAV``
|
||||
|
||||
It contains some test sound files from timit ``TEST`` dataset.
|
||||
|
||||
- ``test_waves/trans.txt``
|
||||
|
||||
It contains the reference transcripts for the sound files in ``test_waves/``.
|
||||
|
||||
The information of the test sound files is listed below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ffprobe -show_format tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : dhc0_si1559
|
||||
sample_min : -4176
|
||||
sample_max : 5984
|
||||
Duration: 00:00:03.40, bitrate: 258 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
$ ffprobe -show_format tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : elc0_si756
|
||||
sample_min : -1546
|
||||
sample_max : 1989
|
||||
Duration: 00:00:04.19, bitrate: 257 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
$ ffprobe -show_format tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
Input #0, nistsphere, from 'tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV':
|
||||
Metadata:
|
||||
database_id : TIMIT
|
||||
database_version: 1.0
|
||||
utterance_id : mgd0_si1564
|
||||
sample_min : -7626
|
||||
sample_max : 10573
|
||||
Duration: 00:00:04.44, bitrate: 257 kb/s
|
||||
Stream #0:0: Audio: pcm_s16le, 16000 Hz, 1 channels, s16, 256 kb/s
|
||||
|
||||
|
||||
Inference with a pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/timit/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py --help
|
||||
|
||||
shows the usage information of ``./tdnn_lstm_ctc/pretrained.py``.
|
||||
|
||||
To decode with ``1best`` method, we can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_lstm_ctc/pretrained.py
|
||||
--method 1best
|
||||
--checkpoint ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/exp/pretrained_average_16_25.pt
|
||||
--words-file ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/words.txt
|
||||
--HLG ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-08 21:02:49,583 INFO [pretrained.py:169] device: cuda:0
|
||||
2021-11-08 21:02:49,584 INFO [pretrained.py:171] Creating model
|
||||
2021-11-08 21:02:53,816 INFO [pretrained.py:183] Loading HLG from ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt
|
||||
2021-11-08 21:02:53,827 INFO [pretrained.py:200] Constructing Fbank computer
|
||||
2021-11-08 21:02:53,827 INFO [pretrained.py:210] Reading sound files: ['./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV', './tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV', './tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV']
|
||||
2021-11-08 21:02:53,831 INFO [pretrained.py:216] Decoding started
|
||||
2021-11-08 21:02:54,380 INFO [pretrained.py:246] Use HLG decoding
|
||||
2021-11-08 21:02:54,387 INFO [pretrained.py:267]
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV:
|
||||
sil dh ih sh uw ah l iy v iy z ih sil p r aa sil k s ih m ey dx ih sil d w uh dx iy w ih s f iy l iy w ih th ih n ih m s eh l f sil jh
|
||||
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV:
|
||||
sil dh ih sil t ih r ih s sil s er r ih m ih sil m aa l ih ng sil k l ey sil r eh sil d w ay sil d aa r sil b ah f sil <UNK> jh
|
||||
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV:
|
||||
sil hh ae z sil b ih n iy w ah z sil b ae n ih sil b ay s sil n ey sil k eh l f eh n s ih z eh n dh eh r w er sil g r ey z ih ng sil k ae dx l sil
|
||||
|
||||
|
||||
2021-11-08 21:02:54,387 INFO [pretrained.py:269] Decoding Done
|
||||
|
||||
|
||||
To decode with ``whole-lattice-rescoring`` methond, you can use
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn_lstm_ctc/pretrained.py \
|
||||
--method whole-lattice-rescoring \
|
||||
--checkpoint ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/exp/pretrained_average_16_25.pt \
|
||||
--words-file ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/words.txt \
|
||||
--HLG ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt \
|
||||
--G ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lm/G_4_gram.pt \
|
||||
--ngram-lm-scale 0.08 \
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV
|
||||
|
||||
The decoding output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-11-08 20:05:22,739 INFO [pretrained.py:169] device: cuda:0
|
||||
2021-11-08 20:05:22,739 INFO [pretrained.py:171] Creating model
|
||||
2021-11-08 20:05:26,959 INFO [pretrained.py:183] Loading HLG from ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lang_phone/HLG.pt
|
||||
2021-11-08 20:05:26,971 INFO [pretrained.py:191] Loading G from ./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/data/lm/G_4_gram.pt
|
||||
2021-11-08 20:05:26,977 INFO [pretrained.py:200] Constructing Fbank computer
|
||||
2021-11-08 20:05:26,978 INFO [pretrained.py:210] Reading sound files: ['./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV', './tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV', './tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV']
|
||||
2021-11-08 20:05:26,981 INFO [pretrained.py:216] Decoding started
|
||||
2021-11-08 20:05:27,519 INFO [pretrained.py:251] Use HLG decoding + LM rescoring
|
||||
2021-11-08 20:05:27,878 INFO [pretrained.py:267]
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FDHC0_SI1559.WAV:
|
||||
sil dh ih sh uw l iy v iy z ih sil p r aa sil k s ah m ey dx ih sil w uh dx iy w ih s f iy l ih ng w ih th ih n ih m s eh l f sil jh
|
||||
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FELC0_SI756.WAV:
|
||||
sil dh ih sil t ih r iy ih s sil s er r eh m ih sil n ah l ih ng sil k l ey sil r eh sil d w ay sil d aa r sil b ow f sil jh
|
||||
|
||||
./tmp-lstm/icefall_asr_timit_tdnn_lstm_ctc/test_waves/FMGD0_SI1564.WAV:
|
||||
sil hh ah z sil b ih n iy w ah z sil b ae n ih sil b ay s sil n ey sil k ih l f eh n s ih z eh n dh eh r w er sil g r ey z ih n sil k ae dx l sil
|
||||
|
||||
|
||||
2021-11-08 20:05:27,878 INFO [pretrained.py:269] Decoding Done
|
||||
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We provide a colab notebook for decoding with pre-trained model.
|
||||
|
||||
|timit tdnn_lstm_ctc colab notebook|
|
||||
|
||||
.. |timit tdnn_lstm_ctc colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1Hs9DA4V96uapw_30uNp32OMJgkuR5VVd
|
||||
|
||||
|
||||
**Congratulations!** You have finished the TDNN-LSTM-CTC recipe on timit in ``icefall``.
|
7
_sources/recipes/yesno/index.rst.txt
Normal file
@ -0,0 +1,7 @@
|
||||
YesNo
|
||||
=====
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
tdnn
|
445
_sources/recipes/yesno/tdnn.rst.txt
Normal file
@ -0,0 +1,445 @@
|
||||
TDNN-CTC
|
||||
========
|
||||
|
||||
This page shows you how to run the `yesno <https://www.openslr.org/1>`_ recipe. It contains:
|
||||
|
||||
- (1) Prepare data for training
|
||||
- (2) Train a TDNN model
|
||||
|
||||
- (a) View text format logs and visualize TensorBoard logs
|
||||
- (b) Select device type, i.e., CPU and GPU, for training
|
||||
- (c) Change training options
|
||||
- (d) Resume training from a checkpoint
|
||||
|
||||
- (3) Decode with a trained model
|
||||
|
||||
- (a) Select a checkpoint for decoding
|
||||
- (b) Model averaging
|
||||
|
||||
- (4) Colab notebook
|
||||
|
||||
- (a) It shows you step by step how to setup the environment, how to do training,
|
||||
and how to do decoding
|
||||
- (b) How to use a pre-trained model
|
||||
|
||||
- (5) Inference with a pre-trained model
|
||||
|
||||
- (a) Download a pre-trained model, provided by us
|
||||
- (b) Decode a single sound file with a pre-trained model
|
||||
- (c) Decode multiple sound files at the same time
|
||||
|
||||
It does **NOT** show you:
|
||||
|
||||
- (1) How to train with multiple GPUs
|
||||
|
||||
The ``yesno`` dataset is so small that CPU is more than enough
|
||||
for training as well as for decoding.
|
||||
|
||||
- (2) How to use LM rescoring for decoding
|
||||
|
||||
The dataset does not have an LM for rescoring.
|
||||
|
||||
.. HINT::
|
||||
|
||||
We assume you have read the page :ref:`install icefall` and have setup
|
||||
the environment for ``icefall``.
|
||||
|
||||
.. HINT::
|
||||
|
||||
You **don't** need a **GPU** to run this recipe. It can be run on a **CPU**.
|
||||
The training part takes less than 30 **seconds** on a CPU and you will get
|
||||
the following WER at the end::
|
||||
|
||||
[test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
|
||||
|
||||
Data preparation
|
||||
----------------
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ ./prepare.sh
|
||||
|
||||
The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
|
||||
All you need to do is to run it.
|
||||
|
||||
The data preparation contains several stages, you can use the following two
|
||||
options:
|
||||
|
||||
- ``--stage``
|
||||
- ``--stop-stage``
|
||||
|
||||
to control which stage(s) should be run. By default, all stages are executed.
|
||||
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ ./prepare.sh --stage 0 --stop-stage 0
|
||||
|
||||
means to run only stage 0.
|
||||
|
||||
To run stage 2 to stage 5, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./prepare.sh --stage 2 --stop-stage 5
|
||||
|
||||
|
||||
Training
|
||||
--------
|
||||
|
||||
We provide only a TDNN model, contained in
|
||||
the `tdnn <https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn>`_
|
||||
folder, for ``yesno``.
|
||||
|
||||
The command to run the training part is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ export CUDA_VISIBLE_DEVICES=""
|
||||
$ ./tdnn/train.py
|
||||
|
||||
By default, it will run ``15`` epochs. Training logs and checkpoints are saved
|
||||
in ``tdnn/exp``.
|
||||
|
||||
In ``tdnn/exp``, you will find the following files:
|
||||
|
||||
- ``epoch-0.pt``, ``epoch-1.pt``, ...
|
||||
|
||||
These are checkpoint files, containing model ``state_dict`` and optimizer ``state_dict``.
|
||||
To resume training from some checkpoint, say ``epoch-10.pt``, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn/train.py --start-epoch 11
|
||||
|
||||
- ``tensorboard/``
|
||||
|
||||
This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd tdnn/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description "TDNN training for yesno with icefall"
|
||||
|
||||
It will print something like below:
|
||||
|
||||
.. code-block::
|
||||
|
||||
TensorFlow installation not found - running with reduced feature set.
|
||||
Upload started and will continue reading any new data as it's added to the logdir.
|
||||
|
||||
To stop uploading, press Ctrl-C.
|
||||
|
||||
New experiment created. View your TensorBoard at: https://tensorboard.dev/experiment/yKUbhb5wRmOSXYkId1z9eg/
|
||||
|
||||
[2021-08-23T23:49:41] Started scanning logdir.
|
||||
[2021-08-23T23:49:42] Total uploaded: 135 scalars, 0 tensors, 0 binary objects
|
||||
Listening for new data in logdir...
|
||||
|
||||
Note there is a URL in the above output, click it and you will see
|
||||
the following screenshot:
|
||||
|
||||
.. figure:: images/tdnn-tensorboard-log.png
|
||||
:width: 600
|
||||
:alt: TensorBoard screenshot
|
||||
:align: center
|
||||
:target: https://tensorboard.dev/experiment/yKUbhb5wRmOSXYkId1z9eg/
|
||||
|
||||
TensorBoard screenshot.
|
||||
|
||||
- ``log/log-train-xxxx``
|
||||
|
||||
It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.
|
||||
|
||||
|
||||
|
||||
.. NOTE::
|
||||
|
||||
By default, ``./tdnn/train.py`` uses GPU 0 for training if GPUs are available.
|
||||
If you have two GPUs, say, GPU 0 and GPU 1, and you want to use GPU 1 for
|
||||
training, you can run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES="1"
|
||||
$ ./tdnn/train.py
|
||||
|
||||
Since the ``yesno`` dataset is very small, containing only 30 sound files
|
||||
for training, and the model in use is also very small, we use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES=""
|
||||
|
||||
so that ``./tdnn/train.py`` uses CPU during training.
|
||||
|
||||
If you don't have GPUs, then you don't need to
|
||||
run ``export CUDA_VISIBLE_DEVICES=""``.
|
||||
|
||||
To see available training options, you can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn/train.py --help
|
||||
|
||||
Other training options, e.g., learning rate, results dir, etc., are
|
||||
pre-configured in the function ``get_params()``
|
||||
in `tdnn/train.py <https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/tdnn/train.py>`_.
|
||||
Normally, you don't need to change them. You can change them by modifying the code, if
|
||||
you want.
|
||||
|
||||
Decoding
|
||||
--------
|
||||
|
||||
The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.
|
||||
|
||||
The command for decoding is:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ export CUDA_VISIBLE_DEVICES=""
|
||||
$ ./tdnn/decode.py
|
||||
|
||||
You will see the WER in the output log.
|
||||
|
||||
Decoded results are saved in ``tdnn/exp``.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn/decode.py --help
|
||||
|
||||
shows you the available decoding options.
|
||||
|
||||
Some commonly used options are:
|
||||
|
||||
- ``--epoch``
|
||||
|
||||
You can select which checkpoint to be used for decoding.
|
||||
For instance, ``./tdnn/decode.py --epoch 10`` means to use
|
||||
``./tdnn/exp/epoch-10.pt`` for decoding.
|
||||
|
||||
- ``--avg``
|
||||
|
||||
It's related to model averaging. It specifies number of checkpoints
|
||||
to be averaged. The averaged model is used for decoding.
|
||||
For example, the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ ./tdnn/decode.py --epoch 10 --avg 3
|
||||
|
||||
uses the average of ``epoch-8.pt``, ``epoch-9.pt`` and ``epoch-10.pt``
|
||||
for decoding.
|
||||
|
||||
- ``--export``
|
||||
|
||||
If it is ``True``, i.e., ``./tdnn/decode.py --export 1``, the code
|
||||
will save the averaged model to ``tdnn/exp/pretrained.pt``.
|
||||
See :ref:`yesno use a pre-trained model` for how to use it.
|
||||
|
||||
|
||||
.. _yesno use a pre-trained model:
|
||||
|
||||
Pre-trained Model
|
||||
-----------------
|
||||
|
||||
We have uploaded the pre-trained model to
|
||||
`<https://huggingface.co/csukuangfj/icefall_asr_yesno_tdnn>`_.
|
||||
|
||||
The following shows you how to use the pre-trained model.
|
||||
|
||||
Download the pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/csukuangfj/icefall_asr_yesno_tdnn
|
||||
|
||||
.. CAUTION::
|
||||
|
||||
You have to use ``git lfs`` to download the pre-trained model.
|
||||
|
||||
After downloading, you will have the following files:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ tree tmp
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
tmp/
|
||||
`-- icefall_asr_yesno_tdnn
|
||||
|-- README.md
|
||||
|-- lang_phone
|
||||
| |-- HLG.pt
|
||||
| |-- L.pt
|
||||
| |-- L_disambig.pt
|
||||
| |-- Linv.pt
|
||||
| |-- lexicon.txt
|
||||
| |-- lexicon_disambig.txt
|
||||
| |-- tokens.txt
|
||||
| `-- words.txt
|
||||
|-- lm
|
||||
| |-- G.arpa
|
||||
| `-- G.fst.txt
|
||||
|-- pretrained.pt
|
||||
`-- test_waves
|
||||
|-- 0_0_0_1_0_0_0_1.wav
|
||||
|-- 0_0_1_0_0_0_1_0.wav
|
||||
|-- 0_0_1_0_0_1_1_1.wav
|
||||
|-- 0_0_1_0_1_0_0_1.wav
|
||||
|-- 0_0_1_1_0_0_0_1.wav
|
||||
|-- 0_0_1_1_0_1_1_0.wav
|
||||
|-- 0_0_1_1_1_0_0_0.wav
|
||||
|-- 0_0_1_1_1_1_0_0.wav
|
||||
|-- 0_1_0_0_0_1_0_0.wav
|
||||
|-- 0_1_0_0_1_0_1_0.wav
|
||||
|-- 0_1_0_1_0_0_0_0.wav
|
||||
|-- 0_1_0_1_1_1_0_0.wav
|
||||
|-- 0_1_1_0_0_1_1_1.wav
|
||||
|-- 0_1_1_1_0_0_1_0.wav
|
||||
|-- 0_1_1_1_1_0_1_0.wav
|
||||
|-- 1_0_0_0_0_0_0_0.wav
|
||||
|-- 1_0_0_0_0_0_1_1.wav
|
||||
|-- 1_0_0_1_0_1_1_1.wav
|
||||
|-- 1_0_1_1_0_1_1_1.wav
|
||||
|-- 1_0_1_1_1_1_0_1.wav
|
||||
|-- 1_1_0_0_0_1_1_1.wav
|
||||
|-- 1_1_0_0_1_0_1_1.wav
|
||||
|-- 1_1_0_1_0_1_0_0.wav
|
||||
|-- 1_1_0_1_1_0_0_1.wav
|
||||
|-- 1_1_0_1_1_1_1_0.wav
|
||||
|-- 1_1_1_0_0_1_0_1.wav
|
||||
|-- 1_1_1_0_1_0_1_0.wav
|
||||
|-- 1_1_1_1_0_0_1_0.wav
|
||||
|-- 1_1_1_1_1_0_0_0.wav
|
||||
`-- 1_1_1_1_1_1_1_1.wav
|
||||
|
||||
4 directories, 42 files
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ soxi tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav
|
||||
|
||||
Input File : 'tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav'
|
||||
Channels : 1
|
||||
Sample Rate : 8000
|
||||
Precision : 16-bit
|
||||
Duration : 00:00:06.76 = 54080 samples ~ 507 CDDA sectors
|
||||
File Size : 108k
|
||||
Bit Rate : 128k
|
||||
Sample Encoding: 16-bit Signed Integer PCM
|
||||
|
||||
- ``0_0_1_0_1_0_0_1.wav``
|
||||
|
||||
0 means No; 1 means Yes. No and Yes are not in English,
|
||||
but in `Hebrew <https://en.wikipedia.org/wiki/Hebrew_language>`_.
|
||||
So this file contains ``NO NO YES NO YES NO NO YES``.
|
||||
|
||||
Download kaldifeat
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
`kaldifeat <https://github.com/csukuangfj/kaldifeat>`_ is used for extracting
|
||||
features from a single or multiple sound files. Please refer to
|
||||
`<https://github.com/csukuangfj/kaldifeat>`_ to install ``kaldifeat`` first.
|
||||
|
||||
Inference with a pre-trained model
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ cd egs/yesno/ASR
|
||||
$ ./tdnn/pretrained.py --help
|
||||
|
||||
shows the usage information of ``./tdnn/pretrained.py``.
|
||||
|
||||
To decode a single file, we can use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_yesno_tdnn/pretrained.pt \
|
||||
--words-file ./tmp/icefall_asr_yesno_tdnn/lang_phone/words.txt \
|
||||
--HLG ./tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt \
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav
|
||||
|
||||
The output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-24 12:22:51,621 INFO [pretrained.py:119] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'checkpoint': './tmp/icefall_asr_yesno_tdnn/pretrained.pt', 'words_file': './tmp/icefall_asr_yesno_tdnn/lang_phone/words.txt', 'HLG': './tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt', 'sound_files': ['./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav']}
|
||||
2021-08-24 12:22:51,645 INFO [pretrained.py:125] device: cpu
|
||||
2021-08-24 12:22:51,645 INFO [pretrained.py:127] Creating model
|
||||
2021-08-24 12:22:51,650 INFO [pretrained.py:139] Loading HLG from ./tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt
|
||||
2021-08-24 12:22:51,651 INFO [pretrained.py:143] Constructing Fbank computer
|
||||
2021-08-24 12:22:51,652 INFO [pretrained.py:153] Reading sound files: ['./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav']
|
||||
2021-08-24 12:22:51,684 INFO [pretrained.py:159] Decoding started
|
||||
2021-08-24 12:22:51,708 INFO [pretrained.py:198]
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav:
|
||||
NO NO YES NO YES NO NO YES
|
||||
|
||||
|
||||
2021-08-24 12:22:51,708 INFO [pretrained.py:200] Decoding Done
|
||||
|
||||
You can see that for the sound file ``0_0_1_0_1_0_0_1.wav``, the decoding result is
|
||||
``NO NO YES NO YES NO NO YES``.
|
||||
|
||||
To decode **multiple** files at the same time, you can use
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./tdnn/pretrained.py \
|
||||
--checkpoint ./tmp/icefall_asr_yesno_tdnn/pretrained.pt \
|
||||
--words-file ./tmp/icefall_asr_yesno_tdnn/lang_phone/words.txt \
|
||||
--HLG ./tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt \
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav \
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/1_0_1_1_0_1_1_1.wav
|
||||
|
||||
The decoding output is:
|
||||
|
||||
.. code-block::
|
||||
|
||||
2021-08-24 12:25:20,159 INFO [pretrained.py:119] {'feature_dim': 23, 'num_classes': 4, 'sample_rate': 8000, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'checkpoint': './tmp/icefall_asr_yesno_tdnn/pretrained.pt', 'words_file': './tmp/icefall_asr_yesno_tdnn/lang_phone/words.txt', 'HLG': './tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt', 'sound_files': ['./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav', './tmp/icefall_asr_yesno_tdnn/test_waves/1_0_1_1_0_1_1_1.wav']}
|
||||
2021-08-24 12:25:20,181 INFO [pretrained.py:125] device: cpu
|
||||
2021-08-24 12:25:20,181 INFO [pretrained.py:127] Creating model
|
||||
2021-08-24 12:25:20,185 INFO [pretrained.py:139] Loading HLG from ./tmp/icefall_asr_yesno_tdnn/lang_phone/HLG.pt
|
||||
2021-08-24 12:25:20,186 INFO [pretrained.py:143] Constructing Fbank computer
|
||||
2021-08-24 12:25:20,187 INFO [pretrained.py:153] Reading sound files: ['./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav',
|
||||
'./tmp/icefall_asr_yesno_tdnn/test_waves/1_0_1_1_0_1_1_1.wav']
|
||||
2021-08-24 12:25:20,213 INFO [pretrained.py:159] Decoding started
|
||||
2021-08-24 12:25:20,287 INFO [pretrained.py:198]
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/0_0_1_0_1_0_0_1.wav:
|
||||
NO NO YES NO YES NO NO YES
|
||||
|
||||
./tmp/icefall_asr_yesno_tdnn/test_waves/1_0_1_1_0_1_1_1.wav:
|
||||
YES NO YES YES NO YES YES YES
|
||||
|
||||
2021-08-24 12:25:20,287 INFO [pretrained.py:200] Decoding Done
|
||||
|
||||
You can see again that it decodes correctly.
|
||||
|
||||
Colab notebook
|
||||
--------------
|
||||
|
||||
We do provide a colab notebook for this recipe.
|
||||
|
||||
|yesno colab notebook|
|
||||
|
||||
.. |yesno colab notebook| image:: https://colab.research.google.com/assets/colab-badge.svg
|
||||
:target: https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing
|
||||
|
||||
|
||||
**Congratulations!** You have finished the simplest speech recognition recipe in ``icefall``.
|
4
_static/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
|
||||
# Introduction
|
||||
|
||||
<https://shields.io/> is used to generate files in this directory.
|
134
_static/_sphinx_javascript_frameworks_compat.js
Normal file
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* _sphinx_javascript_frameworks_compat.js
|
||||
* ~~~~~~~~~~
|
||||
*
|
||||
* Compatability shim for jQuery and underscores.js.
|
||||
*
|
||||
* WILL BE REMOVED IN Sphinx 6.0
|
||||
* xref RemovedInSphinx60Warning
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* select a different prefix for underscore
|
||||
*/
|
||||
$u = _.noConflict();
|
||||
|
||||
|
||||
/**
|
||||
* small helper function to urldecode strings
|
||||
*
|
||||
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL
|
||||
*/
|
||||
jQuery.urldecode = function(x) {
|
||||
if (!x) {
|
||||
return x
|
||||
}
|
||||
return decodeURIComponent(x.replace(/\+/g, ' '));
|
||||
};
|
||||
|
||||
/**
|
||||
* small helper function to urlencode strings
|
||||
*/
|
||||
jQuery.urlencode = encodeURIComponent;
|
||||
|
||||
/**
|
||||
* This function returns the parsed url parameters of the
|
||||
* current request. Multiple values per key are supported,
|
||||
* it will always return arrays of strings for the value parts.
|
||||
*/
|
||||
jQuery.getQueryParameters = function(s) {
|
||||
if (typeof s === 'undefined')
|
||||
s = document.location.search;
|
||||
var parts = s.substr(s.indexOf('?') + 1).split('&');
|
||||
var result = {};
|
||||
for (var i = 0; i < parts.length; i++) {
|
||||
var tmp = parts[i].split('=', 2);
|
||||
var key = jQuery.urldecode(tmp[0]);
|
||||
var value = jQuery.urldecode(tmp[1]);
|
||||
if (key in result)
|
||||
result[key].push(value);
|
||||
else
|
||||
result[key] = [value];
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/**
|
||||
* highlight a given string on a jquery object by wrapping it in
|
||||
* span elements with the given class name.
|
||||
*/
|
||||
jQuery.fn.highlightText = function(text, className) {
|
||||
function highlight(node, addItems) {
|
||||
if (node.nodeType === 3) {
|
||||
var val = node.nodeValue;
|
||||
var pos = val.toLowerCase().indexOf(text);
|
||||
if (pos >= 0 &&
|
||||
!jQuery(node.parentNode).hasClass(className) &&
|
||||
!jQuery(node.parentNode).hasClass("nohighlight")) {
|
||||
var span;
|
||||
var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg");
|
||||
if (isInSVG) {
|
||||
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
|
||||
} else {
|
||||
span = document.createElement("span");
|
||||
span.className = className;
|
||||
}
|
||||
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
|
||||
node.parentNode.insertBefore(span, node.parentNode.insertBefore(
|
||||
document.createTextNode(val.substr(pos + text.length)),
|
||||
node.nextSibling));
|
||||
node.nodeValue = val.substr(0, pos);
|
||||
if (isInSVG) {
|
||||
var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect");
|
||||
var bbox = node.parentElement.getBBox();
|
||||
rect.x.baseVal.value = bbox.x;
|
||||
rect.y.baseVal.value = bbox.y;
|
||||
rect.width.baseVal.value = bbox.width;
|
||||
rect.height.baseVal.value = bbox.height;
|
||||
rect.setAttribute('class', className);
|
||||
addItems.push({
|
||||
"parent": node.parentNode,
|
||||
"target": rect});
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (!jQuery(node).is("button, select, textarea")) {
|
||||
jQuery.each(node.childNodes, function() {
|
||||
highlight(this, addItems);
|
||||
});
|
||||
}
|
||||
}
|
||||
var addItems = [];
|
||||
var result = this.each(function() {
|
||||
highlight(this, addItems);
|
||||
});
|
||||
for (var i = 0; i < addItems.length; ++i) {
|
||||
jQuery(addItems[i].parent).before(addItems[i].target);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
/*
|
||||
* backward compatibility for jQuery.browser
|
||||
* This will be supported until firefox bug is fixed.
|
||||
*/
|
||||
if (!jQuery.browser) {
|
||||
jQuery.uaMatch = function(ua) {
|
||||
ua = ua.toLowerCase();
|
||||
|
||||
var match = /(chrome)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(webkit)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) ||
|
||||
/(msie) ([\w.]+)/.exec(ua) ||
|
||||
ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) ||
|
||||
[];
|
||||
|
||||
return {
|
||||
browser: match[ 1 ] || "",
|
||||
version: match[ 2 ] || "0"
|
||||
};
|
||||
};
|
||||
jQuery.browser = {};
|
||||
jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true;
|
||||
}
|
899
_static/basic.css
Normal file
@ -0,0 +1,899 @@
|
||||
/*
|
||||
* basic.css
|
||||
* ~~~~~~~~~
|
||||
*
|
||||
* Sphinx stylesheet -- basic theme.
|
||||
*
|
||||
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
|
||||
/* -- main layout ----------------------------------------------------------- */
|
||||
|
||||
div.clearer {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
div.section::after {
|
||||
display: block;
|
||||
content: '';
|
||||
clear: left;
|
||||
}
|
||||
|
||||
/* -- relbar ---------------------------------------------------------------- */
|
||||
|
||||
div.related {
|
||||
width: 100%;
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
div.related h3 {
|
||||
display: none;
|
||||
}
|
||||
|
||||
div.related ul {
|
||||
margin: 0;
|
||||
padding: 0 0 0 10px;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
div.related li {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
div.related li.right {
|
||||
float: right;
|
||||
margin-right: 5px;
|
||||
}
|
||||
|
||||
/* -- sidebar --------------------------------------------------------------- */
|
||||
|
||||
div.sphinxsidebarwrapper {
|
||||
padding: 10px 5px 0 10px;
|
||||
}
|
||||
|
||||
div.sphinxsidebar {
|
||||
float: left;
|
||||
width: 230px;
|
||||
margin-left: -100%;
|
||||
font-size: 90%;
|
||||
word-wrap: break-word;
|
||||
overflow-wrap : break-word;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul ul,
|
||||
div.sphinxsidebar ul.want-points {
|
||||
margin-left: 20px;
|
||||
list-style: square;
|
||||
}
|
||||
|
||||
div.sphinxsidebar ul ul {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
div.sphinxsidebar form {
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
div.sphinxsidebar input {
|
||||
border: 1px solid #98dbcc;
|
||||
font-family: sans-serif;
|
||||
font-size: 1em;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox form.search {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox input[type="text"] {
|
||||
float: left;
|
||||
width: 80%;
|
||||
padding: 0.25em;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
div.sphinxsidebar #searchbox input[type="submit"] {
|
||||
float: left;
|
||||
width: 20%;
|
||||
border-left: none;
|
||||
padding: 0.25em;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
|
||||
img {
|
||||
border: 0;
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
/* -- search page ----------------------------------------------------------- */
|
||||
|
||||
ul.search {
|
||||
margin: 10px 0 0 20px;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ul.search li {
|
||||
padding: 5px 0 5px 20px;
|
||||
background-image: url(file.png);
|
||||
background-repeat: no-repeat;
|
||||
background-position: 0 7px;
|
||||
}
|
||||
|
||||
ul.search li a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
ul.search li p.context {
|
||||
color: #888;
|
||||
margin: 2px 0 0 30px;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
ul.keywordmatches li.goodmatch a {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* -- index page ------------------------------------------------------------ */
|
||||
|
||||
table.contentstable {
|
||||
width: 90%;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table.contentstable p.biglink {
|
||||
line-height: 150%;
|
||||
}
|
||||
|
||||
a.biglink {
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
span.linkdescr {
|
||||
font-style: italic;
|
||||
padding-top: 5px;
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
/* -- general index --------------------------------------------------------- */
|
||||
|
||||
table.indextable {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
table.indextable td {
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
table.indextable ul {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
table.indextable > tbody > tr > td > ul {
|
||||
padding-left: 0em;
|
||||
}
|
||||
|
||||
table.indextable tr.pcap {
|
||||
height: 10px;
|
||||
}
|
||||
|
||||
table.indextable tr.cap {
|
||||
margin-top: 10px;
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
|
||||
img.toggler {
|
||||
margin-right: 3px;
|
||||
margin-top: 3px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
div.modindex-jumpbox {
|
||||
border-top: 1px solid #ddd;
|
||||
border-bottom: 1px solid #ddd;
|
||||
margin: 1em 0 1em 0;
|
||||
padding: 0.4em;
|
||||
}
|
||||
|
||||
div.genindex-jumpbox {
|
||||
border-top: 1px solid #ddd;
|
||||
border-bottom: 1px solid #ddd;
|
||||
margin: 1em 0 1em 0;
|
||||
padding: 0.4em;
|
||||
}
|
||||
|
||||
/* -- domain module index --------------------------------------------------- */
|
||||
|
||||
table.modindextable td {
|
||||
padding: 2px;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
/* -- general body styles --------------------------------------------------- */
|
||||
|
||||
div.body {
|
||||
min-width: 360px;
|
||||
max-width: 800px;
|
||||
}
|
||||
|
||||
div.body p, div.body dd, div.body li, div.body blockquote {
|
||||
-moz-hyphens: auto;
|
||||
-ms-hyphens: auto;
|
||||
-webkit-hyphens: auto;
|
||||
hyphens: auto;
|
||||
}
|
||||
|
||||
a.headerlink {
|
||||
visibility: hidden;
|
||||
}
|
||||
a.brackets:before,
|
||||
span.brackets > a:before{
|
||||
content: "[";
|
||||
}
|
||||
|
||||
a.brackets:after,
|
||||
span.brackets > a:after {
|
||||
content: "]";
|
||||
}
|
||||
|
||||
|
||||
h1:hover > a.headerlink,
|
||||
h2:hover > a.headerlink,
|
||||
h3:hover > a.headerlink,
|
||||
h4:hover > a.headerlink,
|
||||
h5:hover > a.headerlink,
|
||||
h6:hover > a.headerlink,
|
||||
dt:hover > a.headerlink,
|
||||
caption:hover > a.headerlink,
|
||||
p.caption:hover > a.headerlink,
|
||||
div.code-block-caption:hover > a.headerlink {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
div.body p.caption {
|
||||
text-align: inherit;
|
||||
}
|
||||
|
||||
div.body td {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.first {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
p.rubric {
|
||||
margin-top: 30px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
img.align-left, figure.align-left, .figure.align-left, object.align-left {
|
||||
clear: left;
|
||||
float: left;
|
||||
margin-right: 1em;
|
||||
}
|
||||
|
||||
img.align-right, figure.align-right, .figure.align-right, object.align-right {
|
||||
clear: right;
|
||||
float: right;
|
||||
margin-left: 1em;
|
||||
}
|
||||
|
||||
img.align-center, figure.align-center, .figure.align-center, object.align-center {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
img.align-default, figure.align-default, .figure.align-default {
|
||||
display: block;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
.align-left {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.align-center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.align-default {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.align-right {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
/* -- sidebars -------------------------------------------------------------- */
|
||||
|
||||
div.sidebar,
|
||||
aside.sidebar {
|
||||
margin: 0 0 0.5em 1em;
|
||||
border: 1px solid #ddb;
|
||||
padding: 7px;
|
||||
background-color: #ffe;
|
||||
width: 40%;
|
||||
float: right;
|
||||
clear: right;
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
p.sidebar-title {
|
||||
font-weight: bold;
|
||||
}
|
||||
div.admonition, div.topic, blockquote {
|
||||
clear: left;
|
||||
}
|
||||
|
||||
/* -- topics ---------------------------------------------------------------- */
|
||||
div.topic {
|
||||
border: 1px solid #ccc;
|
||||
padding: 7px;
|
||||
margin: 10px 0 10px 0;
|
||||
}
|
||||
|
||||
p.topic-title {
|
||||
font-size: 1.1em;
|
||||
font-weight: bold;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
/* -- admonitions ----------------------------------------------------------- */
|
||||
|
||||
div.admonition {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
padding: 7px;
|
||||
}
|
||||
|
||||
div.admonition dt {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
p.admonition-title {
|
||||
margin: 0px 10px 5px 0px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
div.body p.centered {
|
||||
text-align: center;
|
||||
margin-top: 25px;
|
||||
}
|
||||
|
||||
/* -- content of sidebars/topics/admonitions -------------------------------- */
|
||||
|
||||
div.sidebar > :last-child,
|
||||
aside.sidebar > :last-child,
|
||||
div.topic > :last-child,
|
||||
div.admonition > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
div.sidebar::after,
|
||||
aside.sidebar::after,
|
||||
div.topic::after,
|
||||
div.admonition::after,
|
||||
blockquote::after {
|
||||
display: block;
|
||||
content: '';
|
||||
clear: both;
|
||||
}
|
||||
|
||||
/* -- tables ---------------------------------------------------------------- */
|
||||
|
||||
table.docutils {
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
border: 0;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
table.align-center {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table.align-default {
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
table caption span.caption-number {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
table caption span.caption-text {
|
||||
}
|
||||
|
||||
table.docutils td, table.docutils th {
|
||||
padding: 1px 8px 1px 5px;
|
||||
border-top: 0;
|
||||
border-left: 0;
|
||||
border-right: 0;
|
||||
border-bottom: 1px solid #aaa;
|
||||
}
|
||||
|
||||
th {
|
||||
text-align: left;
|
||||
padding-right: 5px;
|
||||
}
|
||||
|
||||
table.citation {
|
||||
border-left: solid 1px gray;
|
||||
margin-left: 1px;
|
||||
}
|
||||
|
||||
table.citation td {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
th > :first-child,
|
||||
td > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
th > :last-child,
|
||||
td > :last-child {
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
/* -- figures --------------------------------------------------------------- */
|
||||
|
||||
div.figure, figure {
|
||||
margin: 0.5em;
|
||||
padding: 0.5em;
|
||||
}
|
||||
|
||||
div.figure p.caption, figcaption {
|
||||
padding: 0.3em;
|
||||
}
|
||||
|
||||
div.figure p.caption span.caption-number,
|
||||
figcaption span.caption-number {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
div.figure p.caption span.caption-text,
|
||||
figcaption span.caption-text {
|
||||
}
|
||||
|
||||
/* -- field list styles ----------------------------------------------------- */
|
||||
|
||||
table.field-list td, table.field-list th {
|
||||
border: 0 !important;
|
||||
}
|
||||
|
||||
.field-list ul {
|
||||
margin: 0;
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
.field-list p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.field-name {
|
||||
-moz-hyphens: manual;
|
||||
-ms-hyphens: manual;
|
||||
-webkit-hyphens: manual;
|
||||
hyphens: manual;
|
||||
}
|
||||
|
||||
/* -- hlist styles ---------------------------------------------------------- */
|
||||
|
||||
table.hlist {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
table.hlist td {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
/* -- object description styles --------------------------------------------- */
|
||||
|
||||
.sig {
|
||||
font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
|
||||
}
|
||||
|
||||
.sig-name, code.descname {
|
||||
background-color: transparent;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.sig-name {
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
code.descname {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
.sig-prename, code.descclassname {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.optional {
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
.sig-paren {
|
||||
font-size: larger;
|
||||
}
|
||||
|
||||
.sig-param.n {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* C++ specific styling */
|
||||
|
||||
.sig-inline.c-texpr,
|
||||
.sig-inline.cpp-texpr {
|
||||
font-family: unset;
|
||||
}
|
||||
|
||||
.sig.c .k, .sig.c .kt,
|
||||
.sig.cpp .k, .sig.cpp .kt {
|
||||
color: #0033B3;
|
||||
}
|
||||
|
||||
.sig.c .m,
|
||||
.sig.cpp .m {
|
||||
color: #1750EB;
|
||||
}
|
||||
|
||||
.sig.c .s, .sig.c .sc,
|
||||
.sig.cpp .s, .sig.cpp .sc {
|
||||
color: #067D17;
|
||||
}
|
||||
|
||||
|
||||
/* -- other body styles ----------------------------------------------------- */
|
||||
|
||||
ol.arabic {
|
||||
list-style: decimal;
|
||||
}
|
||||
|
||||
ol.loweralpha {
|
||||
list-style: lower-alpha;
|
||||
}
|
||||
|
||||
ol.upperalpha {
|
||||
list-style: upper-alpha;
|
||||
}
|
||||
|
||||
ol.lowerroman {
|
||||
list-style: lower-roman;
|
||||
}
|
||||
|
||||
ol.upperroman {
|
||||
list-style: upper-roman;
|
||||
}
|
||||
|
||||
:not(li) > ol > li:first-child > :first-child,
|
||||
:not(li) > ul > li:first-child > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
:not(li) > ol > li:last-child > :last-child,
|
||||
:not(li) > ul > li:last-child > :last-child {
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
ol.simple ol p,
|
||||
ol.simple ul p,
|
||||
ul.simple ol p,
|
||||
ul.simple ul p {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
ol.simple > li:not(:first-child) > p,
|
||||
ul.simple > li:not(:first-child) > p {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
ol.simple p,
|
||||
ul.simple p {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
dl.footnote > dt,
|
||||
dl.citation > dt {
|
||||
float: left;
|
||||
margin-right: 0.5em;
|
||||
}
|
||||
|
||||
dl.footnote > dd,
|
||||
dl.citation > dd {
|
||||
margin-bottom: 0em;
|
||||
}
|
||||
|
||||
dl.footnote > dd:after,
|
||||
dl.citation > dd:after {
|
||||
content: "";
|
||||
clear: both;
|
||||
}
|
||||
|
||||
dl.field-list {
|
||||
display: grid;
|
||||
grid-template-columns: fit-content(30%) auto;
|
||||
}
|
||||
|
||||
dl.field-list > dt {
|
||||
font-weight: bold;
|
||||
word-break: break-word;
|
||||
padding-left: 0.5em;
|
||||
padding-right: 5px;
|
||||
}
|
||||
dl.field-list > dt:after {
|
||||
content: ":";
|
||||
}
|
||||
|
||||
|
||||
dl.field-list > dd {
|
||||
padding-left: 0.5em;
|
||||
margin-top: 0em;
|
||||
margin-left: 0em;
|
||||
margin-bottom: 0em;
|
||||
}
|
||||
|
||||
dl {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
dd > :first-child {
|
||||
margin-top: 0px;
|
||||
}
|
||||
|
||||
dd ul, dd table {
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
dd {
|
||||
margin-top: 3px;
|
||||
margin-bottom: 10px;
|
||||
margin-left: 30px;
|
||||
}
|
||||
|
||||
dl > dd:last-child,
|
||||
dl > dd:last-child > :last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
dt:target, span.highlighted {
|
||||
background-color: #fbe54e;
|
||||
}
|
||||
|
||||
rect.highlighted {
|
||||
fill: #fbe54e;
|
||||
}
|
||||
|
||||
dl.glossary dt {
|
||||
font-weight: bold;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
.versionmodified {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.system-message {
|
||||
background-color: #fda;
|
||||
padding: 5px;
|
||||
border: 3px solid red;
|
||||
}
|
||||
|
||||
.footnote:target {
|
||||
background-color: #ffa;
|
||||
}
|
||||
|
||||
.line-block {
|
||||
display: block;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
|
||||
.line-block .line-block {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
margin-left: 1.5em;
|
||||
}
|
||||
|
||||
.guilabel, .menuselection {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
.accelerator {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.classifier {
|
||||
font-style: oblique;
|
||||
}
|
||||
|
||||
.classifier:before {
|
||||
font-style: normal;
|
||||
margin: 0 0.5em;
|
||||
content: ":";
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
abbr, acronym {
|
||||
border-bottom: dotted 1px;
|
||||
cursor: help;
|
||||
}
|
||||
|
||||
/* -- code displays --------------------------------------------------------- */
|
||||
|
||||
pre {
|
||||
overflow: auto;
|
||||
overflow-y: hidden; /* fixes display issues on Chrome browsers */
|
||||
}
|
||||
|
||||
pre, div[class*="highlight-"] {
|
||||
clear: both;
|
||||
}
|
||||
|
||||
span.pre {
|
||||
-moz-hyphens: none;
|
||||
-ms-hyphens: none;
|
||||
-webkit-hyphens: none;
|
||||
hyphens: none;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
div[class*="highlight-"] {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
td.linenos pre {
|
||||
border: 0;
|
||||
background-color: transparent;
|
||||
color: #aaa;
|
||||
}
|
||||
|
||||
table.highlighttable {
|
||||
display: block;
|
||||
}
|
||||
|
||||
table.highlighttable tbody {
|
||||
display: block;
|
||||
}
|
||||
|
||||
table.highlighttable tr {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
table.highlighttable td {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
table.highlighttable td.linenos {
|
||||
padding-right: 0.5em;
|
||||
}
|
||||
|
||||
table.highlighttable td.code {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.highlight .hll {
|
||||
display: block;
|
||||
}
|
||||
|
||||
div.highlight pre,
|
||||
table.highlighttable pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
div.code-block-caption + div {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
div.code-block-caption {
|
||||
margin-top: 1em;
|
||||
padding: 2px 5px;
|
||||
font-size: small;
|
||||
}
|
||||
|
||||
div.code-block-caption code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
table.highlighttable td.linenos,
|
||||
span.linenos,
|
||||
div.highlight span.gp { /* gp: Generic.Prompt */
|
||||
user-select: none;
|
||||
-webkit-user-select: text; /* Safari fallback only */
|
||||
-webkit-user-select: none; /* Chrome/Safari */
|
||||
-moz-user-select: none; /* Firefox */
|
||||
-ms-user-select: none; /* IE10+ */
|
||||
}
|
||||
|
||||
div.code-block-caption span.caption-number {
|
||||
padding: 0.1em 0.3em;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
div.code-block-caption span.caption-text {
|
||||
}
|
||||
|
||||
div.literal-block-wrapper {
|
||||
margin: 1em 0;
|
||||
}
|
||||
|
||||
code.xref, a code {
|
||||
background-color: transparent;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.viewcode-link {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.viewcode-back {
|
||||
float: right;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
div.viewcode-block:target {
|
||||
margin: -1px -10px;
|
||||
padding: 0 10px;
|
||||
}
|
||||
|
||||
/* -- math display ---------------------------------------------------------- */
|
||||
|
||||
img.math {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
div.body div.math p {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
span.eqno {
|
||||
float: right;
|
||||
}
|
||||
|
||||
span.eqno a.headerlink {
|
||||
position: absolute;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
div.math:hover a.headerlink {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
/* -- printout stylesheet --------------------------------------------------- */
|
||||
|
||||
@media print {
|
||||
div.document,
|
||||
div.documentwrapper,
|
||||
div.bodywrapper {
|
||||
margin: 0 !important;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
div.sphinxsidebar,
|
||||
div.related,
|
||||
div.footer,
|
||||
#top-link {
|
||||
display: none;
|
||||
}
|
||||
}
|
1
_static/css/badge_only.css
Normal file
@ -0,0 +1 @@
|
||||
.fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}
|
BIN
_static/css/fonts/Roboto-Slab-Bold.woff
Normal file
BIN
_static/css/fonts/Roboto-Slab-Bold.woff2
Normal file
BIN
_static/css/fonts/Roboto-Slab-Regular.woff
Normal file
BIN
_static/css/fonts/Roboto-Slab-Regular.woff2
Normal file
BIN
_static/css/fonts/fontawesome-webfont.eot
Normal file
2671
_static/css/fonts/fontawesome-webfont.svg
Normal file
After Width: | Height: | Size: 434 KiB |
BIN
_static/css/fonts/fontawesome-webfont.ttf
Normal file
BIN
_static/css/fonts/fontawesome-webfont.woff
Normal file
BIN
_static/css/fonts/fontawesome-webfont.woff2
Normal file
BIN
_static/css/fonts/lato-bold-italic.woff
Normal file
BIN
_static/css/fonts/lato-bold-italic.woff2
Normal file
BIN
_static/css/fonts/lato-bold.woff
Normal file
BIN
_static/css/fonts/lato-bold.woff2
Normal file
BIN
_static/css/fonts/lato-normal-italic.woff
Normal file
BIN
_static/css/fonts/lato-normal-italic.woff2
Normal file
BIN
_static/css/fonts/lato-normal.woff
Normal file
BIN
_static/css/fonts/lato-normal.woff2
Normal file
4
_static/css/theme.css
Normal file
1
_static/device-CPU_CUDA-orange.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="122" height="20" role="img" aria-label="device: CPU | CUDA"><title>device: CPU | CUDA</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="45" height="20" fill="#555"/><rect x="45" width="77" height="20" fill="#fe7d37"/><rect width="122" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="235" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="350">device</text><text x="235" y="140" transform="scale(.1)" fill="#fff" textLength="350">device</text><text aria-hidden="true" x="825" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="670">CPU | CUDA</text><text x="825" y="140" transform="scale(.1)" fill="#fff" textLength="670">CPU | CUDA</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
264
_static/doctools.js
Normal file
@ -0,0 +1,264 @@
|
||||
/*
|
||||
* doctools.js
|
||||
* ~~~~~~~~~~~
|
||||
*
|
||||
* Base JavaScript utilities for all Sphinx HTML documentation.
|
||||
*
|
||||
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
"use strict";
|
||||
|
||||
const _ready = (callback) => {
|
||||
if (document.readyState !== "loading") {
|
||||
callback();
|
||||
} else {
|
||||
document.addEventListener("DOMContentLoaded", callback);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* highlight a given string on a node by wrapping it in
|
||||
* span elements with the given class name.
|
||||
*/
|
||||
const _highlight = (node, addItems, text, className) => {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
const val = node.nodeValue;
|
||||
const parent = node.parentNode;
|
||||
const pos = val.toLowerCase().indexOf(text);
|
||||
if (
|
||||
pos >= 0 &&
|
||||
!parent.classList.contains(className) &&
|
||||
!parent.classList.contains("nohighlight")
|
||||
) {
|
||||
let span;
|
||||
|
||||
const closestNode = parent.closest("body, svg, foreignObject");
|
||||
const isInSVG = closestNode && closestNode.matches("svg");
|
||||
if (isInSVG) {
|
||||
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
|
||||
} else {
|
||||
span = document.createElement("span");
|
||||
span.classList.add(className);
|
||||
}
|
||||
|
||||
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
|
||||
parent.insertBefore(
|
||||
span,
|
||||
parent.insertBefore(
|
||||
document.createTextNode(val.substr(pos + text.length)),
|
||||
node.nextSibling
|
||||
)
|
||||
);
|
||||
node.nodeValue = val.substr(0, pos);
|
||||
|
||||
if (isInSVG) {
|
||||
const rect = document.createElementNS(
|
||||
"http://www.w3.org/2000/svg",
|
||||
"rect"
|
||||
);
|
||||
const bbox = parent.getBBox();
|
||||
rect.x.baseVal.value = bbox.x;
|
||||
rect.y.baseVal.value = bbox.y;
|
||||
rect.width.baseVal.value = bbox.width;
|
||||
rect.height.baseVal.value = bbox.height;
|
||||
rect.setAttribute("class", className);
|
||||
addItems.push({ parent: parent, target: rect });
|
||||
}
|
||||
}
|
||||
} else if (node.matches && !node.matches("button, select, textarea")) {
|
||||
node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
|
||||
}
|
||||
};
|
||||
const _highlightText = (thisNode, text, className) => {
|
||||
let addItems = [];
|
||||
_highlight(thisNode, addItems, text, className);
|
||||
addItems.forEach((obj) =>
|
||||
obj.parent.insertAdjacentElement("beforebegin", obj.target)
|
||||
);
|
||||
};
|
||||
|
||||
/**
|
||||
* Small JavaScript module for the documentation.
|
||||
*/
|
||||
const Documentation = {
|
||||
init: () => {
|
||||
Documentation.highlightSearchWords();
|
||||
Documentation.initDomainIndexTable();
|
||||
Documentation.initOnKeyListeners();
|
||||
},
|
||||
|
||||
/**
|
||||
* i18n support
|
||||
*/
|
||||
TRANSLATIONS: {},
|
||||
PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
|
||||
LOCALE: "unknown",
|
||||
|
||||
// gettext and ngettext don't access this so that the functions
|
||||
// can safely bound to a different name (_ = Documentation.gettext)
|
||||
gettext: (string) => {
|
||||
const translated = Documentation.TRANSLATIONS[string];
|
||||
switch (typeof translated) {
|
||||
case "undefined":
|
||||
return string; // no translation
|
||||
case "string":
|
||||
return translated; // translation exists
|
||||
default:
|
||||
return translated[0]; // (singular, plural) translation tuple exists
|
||||
}
|
||||
},
|
||||
|
||||
ngettext: (singular, plural, n) => {
|
||||
const translated = Documentation.TRANSLATIONS[singular];
|
||||
if (typeof translated !== "undefined")
|
||||
return translated[Documentation.PLURAL_EXPR(n)];
|
||||
return n === 1 ? singular : plural;
|
||||
},
|
||||
|
||||
addTranslations: (catalog) => {
|
||||
Object.assign(Documentation.TRANSLATIONS, catalog.messages);
|
||||
Documentation.PLURAL_EXPR = new Function(
|
||||
"n",
|
||||
`return (${catalog.plural_expr})`
|
||||
);
|
||||
Documentation.LOCALE = catalog.locale;
|
||||
},
|
||||
|
||||
/**
|
||||
* highlight the search words provided in the url in the text
|
||||
*/
|
||||
highlightSearchWords: () => {
|
||||
const highlight =
|
||||
new URLSearchParams(window.location.search).get("highlight") || "";
|
||||
const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
|
||||
if (terms.length === 0) return; // nothing to do
|
||||
|
||||
// There should never be more than one element matching "div.body"
|
||||
const divBody = document.querySelectorAll("div.body");
|
||||
const body = divBody.length ? divBody[0] : document.querySelector("body");
|
||||
window.setTimeout(() => {
|
||||
terms.forEach((term) => _highlightText(body, term, "highlighted"));
|
||||
}, 10);
|
||||
|
||||
const searchBox = document.getElementById("searchbox");
|
||||
if (searchBox === null) return;
|
||||
searchBox.appendChild(
|
||||
document
|
||||
.createRange()
|
||||
.createContextualFragment(
|
||||
'<p class="highlight-link">' +
|
||||
'<a href="javascript:Documentation.hideSearchWords()">' +
|
||||
Documentation.gettext("Hide Search Matches") +
|
||||
"</a></p>"
|
||||
)
|
||||
);
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to hide the search marks again
|
||||
*/
|
||||
hideSearchWords: () => {
|
||||
document
|
||||
.querySelectorAll("#searchbox .highlight-link")
|
||||
.forEach((el) => el.remove());
|
||||
document
|
||||
.querySelectorAll("span.highlighted")
|
||||
.forEach((el) => el.classList.remove("highlighted"));
|
||||
const url = new URL(window.location);
|
||||
url.searchParams.delete("highlight");
|
||||
window.history.replaceState({}, "", url);
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to focus on search bar
|
||||
*/
|
||||
focusSearchBar: () => {
|
||||
document.querySelectorAll("input[name=q]")[0]?.focus();
|
||||
},
|
||||
|
||||
/**
|
||||
* Initialise the domain index toggle buttons
|
||||
*/
|
||||
initDomainIndexTable: () => {
|
||||
const toggler = (el) => {
|
||||
const idNumber = el.id.substr(7);
|
||||
const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
|
||||
if (el.src.substr(-9) === "minus.png") {
|
||||
el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
|
||||
toggledRows.forEach((el) => (el.style.display = "none"));
|
||||
} else {
|
||||
el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
|
||||
toggledRows.forEach((el) => (el.style.display = ""));
|
||||
}
|
||||
};
|
||||
|
||||
const togglerElements = document.querySelectorAll("img.toggler");
|
||||
togglerElements.forEach((el) =>
|
||||
el.addEventListener("click", (event) => toggler(event.currentTarget))
|
||||
);
|
||||
togglerElements.forEach((el) => (el.style.display = ""));
|
||||
if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
|
||||
},
|
||||
|
||||
initOnKeyListeners: () => {
|
||||
// only install a listener if it is really needed
|
||||
if (
|
||||
!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
|
||||
!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
|
||||
)
|
||||
return;
|
||||
|
||||
const blacklistedElements = new Set([
|
||||
"TEXTAREA",
|
||||
"INPUT",
|
||||
"SELECT",
|
||||
"BUTTON",
|
||||
]);
|
||||
document.addEventListener("keydown", (event) => {
|
||||
if (blacklistedElements.has(document.activeElement.tagName)) return; // bail for input elements
|
||||
if (event.altKey || event.ctrlKey || event.metaKey) return; // bail with special keys
|
||||
|
||||
if (!event.shiftKey) {
|
||||
switch (event.key) {
|
||||
case "ArrowLeft":
|
||||
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||
|
||||
const prevLink = document.querySelector('link[rel="prev"]');
|
||||
if (prevLink && prevLink.href) {
|
||||
window.location.href = prevLink.href;
|
||||
event.preventDefault();
|
||||
}
|
||||
break;
|
||||
case "ArrowRight":
|
||||
if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
|
||||
|
||||
const nextLink = document.querySelector('link[rel="next"]');
|
||||
if (nextLink && nextLink.href) {
|
||||
window.location.href = nextLink.href;
|
||||
event.preventDefault();
|
||||
}
|
||||
break;
|
||||
case "Escape":
|
||||
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
|
||||
Documentation.hideSearchWords();
|
||||
event.preventDefault();
|
||||
}
|
||||
}
|
||||
|
||||
// some keyboard layouts may need Shift to get /
|
||||
switch (event.key) {
|
||||
case "/":
|
||||
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
|
||||
Documentation.focusSearchBar();
|
||||
event.preventDefault();
|
||||
}
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
// quick alias for translations
|
||||
const _ = Documentation.gettext;
|
||||
|
||||
_ready(Documentation.init);
|
14
_static/documentation_options.js
Normal file
@ -0,0 +1,14 @@
|
||||
var DOCUMENTATION_OPTIONS = {
|
||||
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
|
||||
VERSION: '0.1',
|
||||
LANGUAGE: 'en',
|
||||
COLLAPSE_INDEX: false,
|
||||
BUILDER: 'html',
|
||||
FILE_SUFFIX: '.html',
|
||||
LINK_SUFFIX: '.html',
|
||||
HAS_SOURCE: true,
|
||||
SOURCELINK_SUFFIX: '.txt',
|
||||
NAVIGATION_WITH_KEYS: false,
|
||||
SHOW_SEARCH_SUMMARY: true,
|
||||
ENABLE_SEARCH_SHORTCUTS: true,
|
||||
};
|
BIN
_static/file.png
Normal file
After Width: | Height: | Size: 286 B |
10881
_static/jquery-3.6.0.js
vendored
Normal file
2
_static/jquery.js
vendored
Normal file
1
_static/js/badge_only.js
Normal file
@ -0,0 +1 @@
|
||||
!function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});
|
4
_static/js/html5shiv-printshiv.min.js
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
/**
|
||||
* @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
|
||||
*/
|
||||
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);
|
4
_static/js/html5shiv.min.js
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
/**
|
||||
* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
|
||||
*/
|
||||
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);
|
1
_static/js/theme.js
Normal file
1
_static/k2-gt-v1.9-blueviolet.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
199
_static/language_data.js
Normal file
@ -0,0 +1,199 @@
|
||||
/*
|
||||
* language_data.js
|
||||
* ~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* This script contains the language-specific data used by searchtools.js,
|
||||
* namely the list of stopwords, stemmer, scorer and splitter.
|
||||
*
|
||||
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
|
||||
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
|
||||
|
||||
|
||||
/* Non-minified version is copied as a separate JS file, is available */
|
||||
|
||||
/**
|
||||
* Porter Stemmer
|
||||
*/
|
||||
var Stemmer = function() {
|
||||
|
||||
var step2list = {
|
||||
ational: 'ate',
|
||||
tional: 'tion',
|
||||
enci: 'ence',
|
||||
anci: 'ance',
|
||||
izer: 'ize',
|
||||
bli: 'ble',
|
||||
alli: 'al',
|
||||
entli: 'ent',
|
||||
eli: 'e',
|
||||
ousli: 'ous',
|
||||
ization: 'ize',
|
||||
ation: 'ate',
|
||||
ator: 'ate',
|
||||
alism: 'al',
|
||||
iveness: 'ive',
|
||||
fulness: 'ful',
|
||||
ousness: 'ous',
|
||||
aliti: 'al',
|
||||
iviti: 'ive',
|
||||
biliti: 'ble',
|
||||
logi: 'log'
|
||||
};
|
||||
|
||||
var step3list = {
|
||||
icate: 'ic',
|
||||
ative: '',
|
||||
alize: 'al',
|
||||
iciti: 'ic',
|
||||
ical: 'ic',
|
||||
ful: '',
|
||||
ness: ''
|
||||
};
|
||||
|
||||
var c = "[^aeiou]"; // consonant
|
||||
var v = "[aeiouy]"; // vowel
|
||||
var C = c + "[^aeiouy]*"; // consonant sequence
|
||||
var V = v + "[aeiou]*"; // vowel sequence
|
||||
|
||||
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
|
||||
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
|
||||
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
|
||||
var s_v = "^(" + C + ")?" + v; // vowel in stem
|
||||
|
||||
this.stemWord = function (w) {
|
||||
var stem;
|
||||
var suffix;
|
||||
var firstch;
|
||||
var origword = w;
|
||||
|
||||
if (w.length < 3)
|
||||
return w;
|
||||
|
||||
var re;
|
||||
var re2;
|
||||
var re3;
|
||||
var re4;
|
||||
|
||||
firstch = w.substr(0,1);
|
||||
if (firstch == "y")
|
||||
w = firstch.toUpperCase() + w.substr(1);
|
||||
|
||||
// Step 1a
|
||||
re = /^(.+?)(ss|i)es$/;
|
||||
re2 = /^(.+?)([^s])s$/;
|
||||
|
||||
if (re.test(w))
|
||||
w = w.replace(re,"$1$2");
|
||||
else if (re2.test(w))
|
||||
w = w.replace(re2,"$1$2");
|
||||
|
||||
// Step 1b
|
||||
re = /^(.+?)eed$/;
|
||||
re2 = /^(.+?)(ed|ing)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(fp[1])) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
}
|
||||
else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1];
|
||||
re2 = new RegExp(s_v);
|
||||
if (re2.test(stem)) {
|
||||
w = stem;
|
||||
re2 = /(at|bl|iz)$/;
|
||||
re3 = new RegExp("([^aeiouylsz])\\1$");
|
||||
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re2.test(w))
|
||||
w = w + "e";
|
||||
else if (re3.test(w)) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
else if (re4.test(w))
|
||||
w = w + "e";
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1c
|
||||
re = /^(.+?)y$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(s_v);
|
||||
if (re.test(stem))
|
||||
w = stem + "i";
|
||||
}
|
||||
|
||||
// Step 2
|
||||
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem))
|
||||
w = stem + step2list[suffix];
|
||||
}
|
||||
|
||||
// Step 3
|
||||
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
suffix = fp[2];
|
||||
re = new RegExp(mgr0);
|
||||
if (re.test(stem))
|
||||
w = stem + step3list[suffix];
|
||||
}
|
||||
|
||||
// Step 4
|
||||
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
|
||||
re2 = /^(.+?)(s|t)(ion)$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
if (re.test(stem))
|
||||
w = stem;
|
||||
}
|
||||
else if (re2.test(w)) {
|
||||
var fp = re2.exec(w);
|
||||
stem = fp[1] + fp[2];
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re2.test(stem))
|
||||
w = stem;
|
||||
}
|
||||
|
||||
// Step 5
|
||||
re = /^(.+?)e$/;
|
||||
if (re.test(w)) {
|
||||
var fp = re.exec(w);
|
||||
stem = fp[1];
|
||||
re = new RegExp(mgr1);
|
||||
re2 = new RegExp(meq1);
|
||||
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
||||
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
|
||||
w = stem;
|
||||
}
|
||||
re = /ll$/;
|
||||
re2 = new RegExp(mgr1);
|
||||
if (re.test(w) && re2.test(w)) {
|
||||
re = /.$/;
|
||||
w = w.replace(re,"");
|
||||
}
|
||||
|
||||
// and turn initial Y back to y
|
||||
if (firstch == "y")
|
||||
w = firstch.toLowerCase() + w.substr(1);
|
||||
return w;
|
||||
}
|
||||
}
|
||||
|
BIN
_static/logo.png
Normal file
After Width: | Height: | Size: 666 KiB |
BIN
_static/minus.png
Normal file
After Width: | Height: | Size: 90 B |
1
_static/os-Linux_macOS-ff69b4.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="114" height="20" role="img" aria-label="os: Linux | macOS"><title>os: Linux | macOS</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="114" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="91" height="20" fill="#ff69b4"/><rect width="114" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">os</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">os</text><text aria-hidden="true" x="675" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="810">Linux | macOS</text><text x="675" y="140" transform="scale(.1)" fill="#fff" textLength="810">Linux | macOS</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
BIN
_static/plus.png
Normal file
After Width: | Height: | Size: 90 B |
74
_static/pygments.css
Normal file
@ -0,0 +1,74 @@
|
||||
pre { line-height: 125%; }
|
||||
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
|
||||
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
|
||||
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
|
||||
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
|
||||
.highlight .hll { background-color: #ffffcc }
|
||||
.highlight { background: #eeffcc; }
|
||||
.highlight .c { color: #408090; font-style: italic } /* Comment */
|
||||
.highlight .err { border: 1px solid #FF0000 } /* Error */
|
||||
.highlight .k { color: #007020; font-weight: bold } /* Keyword */
|
||||
.highlight .o { color: #666666 } /* Operator */
|
||||
.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
|
||||
.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
|
||||
.highlight .cp { color: #007020 } /* Comment.Preproc */
|
||||
.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
|
||||
.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
|
||||
.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
|
||||
.highlight .gd { color: #A00000 } /* Generic.Deleted */
|
||||
.highlight .ge { font-style: italic } /* Generic.Emph */
|
||||
.highlight .gr { color: #FF0000 } /* Generic.Error */
|
||||
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
|
||||
.highlight .gi { color: #00A000 } /* Generic.Inserted */
|
||||
.highlight .go { color: #333333 } /* Generic.Output */
|
||||
.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
|
||||
.highlight .gs { font-weight: bold } /* Generic.Strong */
|
||||
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
|
||||
.highlight .gt { color: #0044DD } /* Generic.Traceback */
|
||||
.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
|
||||
.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
|
||||
.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
|
||||
.highlight .kp { color: #007020 } /* Keyword.Pseudo */
|
||||
.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
|
||||
.highlight .kt { color: #902000 } /* Keyword.Type */
|
||||
.highlight .m { color: #208050 } /* Literal.Number */
|
||||
.highlight .s { color: #4070a0 } /* Literal.String */
|
||||
.highlight .na { color: #4070a0 } /* Name.Attribute */
|
||||
.highlight .nb { color: #007020 } /* Name.Builtin */
|
||||
.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
|
||||
.highlight .no { color: #60add5 } /* Name.Constant */
|
||||
.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
|
||||
.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
|
||||
.highlight .ne { color: #007020 } /* Name.Exception */
|
||||
.highlight .nf { color: #06287e } /* Name.Function */
|
||||
.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
|
||||
.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
|
||||
.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
|
||||
.highlight .nv { color: #bb60d5 } /* Name.Variable */
|
||||
.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
|
||||
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
|
||||
.highlight .mb { color: #208050 } /* Literal.Number.Bin */
|
||||
.highlight .mf { color: #208050 } /* Literal.Number.Float */
|
||||
.highlight .mh { color: #208050 } /* Literal.Number.Hex */
|
||||
.highlight .mi { color: #208050 } /* Literal.Number.Integer */
|
||||
.highlight .mo { color: #208050 } /* Literal.Number.Oct */
|
||||
.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
|
||||
.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
|
||||
.highlight .sc { color: #4070a0 } /* Literal.String.Char */
|
||||
.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
|
||||
.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
|
||||
.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
|
||||
.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
|
||||
.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
|
||||
.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
|
||||
.highlight .sx { color: #c65d09 } /* Literal.String.Other */
|
||||
.highlight .sr { color: #235388 } /* Literal.String.Regex */
|
||||
.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
|
||||
.highlight .ss { color: #517918 } /* Literal.String.Symbol */
|
||||
.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
|
||||
.highlight .fm { color: #06287e } /* Name.Function.Magic */
|
||||
.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
|
||||
.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
|
||||
.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
|
||||
.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
|
||||
.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
|
1
_static/python-gt-v3.6-blue.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: >= 3.6"><title>python: >= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">>= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">>= 3.6</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
530
_static/searchtools.js
Normal file
@ -0,0 +1,530 @@
|
||||
/*
|
||||
* searchtools.js
|
||||
* ~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* Sphinx JavaScript utilities for the full-text search.
|
||||
*
|
||||
* :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS.
|
||||
* :license: BSD, see LICENSE for details.
|
||||
*
|
||||
*/
|
||||
"use strict";
|
||||
|
||||
/**
|
||||
* Simple result scoring code.
|
||||
*/
|
||||
if (typeof Scorer === "undefined") {
|
||||
var Scorer = {
|
||||
// Implement the following function to further tweak the score for each result
|
||||
// The function takes a result array [docname, title, anchor, descr, score, filename]
|
||||
// and returns the new score.
|
||||
/*
|
||||
score: result => {
|
||||
const [docname, title, anchor, descr, score, filename] = result
|
||||
return score
|
||||
},
|
||||
*/
|
||||
|
||||
// query matches the full name of an object
|
||||
objNameMatch: 11,
|
||||
// or matches in the last dotted part of the object name
|
||||
objPartialMatch: 6,
|
||||
// Additive scores depending on the priority of the object
|
||||
objPrio: {
|
||||
0: 15, // used to be importantResults
|
||||
1: 5, // used to be objectResults
|
||||
2: -5, // used to be unimportantResults
|
||||
},
|
||||
// Used when the priority is not in the mapping.
|
||||
objPrioDefault: 0,
|
||||
|
||||
// query found in title
|
||||
title: 15,
|
||||
partialTitle: 7,
|
||||
// query found in terms
|
||||
term: 5,
|
||||
partialTerm: 2,
|
||||
};
|
||||
}
|
||||
|
||||
const _removeChildren = (element) => {
|
||||
while (element && element.lastChild) element.removeChild(element.lastChild);
|
||||
};
|
||||
|
||||
/**
|
||||
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
|
||||
*/
|
||||
const _escapeRegExp = (string) =>
|
||||
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
|
||||
|
||||
const _displayItem = (item, highlightTerms, searchTerms) => {
|
||||
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
|
||||
const docUrlRoot = DOCUMENTATION_OPTIONS.URL_ROOT;
|
||||
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
|
||||
const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
|
||||
const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
|
||||
|
||||
const [docName, title, anchor, descr] = item;
|
||||
|
||||
let listItem = document.createElement("li");
|
||||
let requestUrl;
|
||||
let linkUrl;
|
||||
if (docBuilder === "dirhtml") {
|
||||
// dirhtml builder
|
||||
let dirname = docName + "/";
|
||||
if (dirname.match(/\/index\/$/))
|
||||
dirname = dirname.substring(0, dirname.length - 6);
|
||||
else if (dirname === "index/") dirname = "";
|
||||
requestUrl = docUrlRoot + dirname;
|
||||
linkUrl = requestUrl;
|
||||
} else {
|
||||
// normal html builders
|
||||
requestUrl = docUrlRoot + docName + docFileSuffix;
|
||||
linkUrl = docName + docLinkSuffix;
|
||||
}
|
||||
const params = new URLSearchParams();
|
||||
params.set("highlight", [...highlightTerms].join(" "));
|
||||
let linkEl = listItem.appendChild(document.createElement("a"));
|
||||
linkEl.href = linkUrl + "?" + params.toString() + anchor;
|
||||
linkEl.innerHTML = title;
|
||||
if (descr)
|
||||
listItem.appendChild(document.createElement("span")).innerHTML =
|
||||
" (" + descr + ")";
|
||||
else if (showSearchSummary)
|
||||
fetch(requestUrl)
|
||||
.then((responseData) => responseData.text())
|
||||
.then((data) => {
|
||||
if (data)
|
||||
listItem.appendChild(
|
||||
Search.makeSearchSummary(data, searchTerms, highlightTerms)
|
||||
);
|
||||
});
|
||||
Search.output.appendChild(listItem);
|
||||
};
|
||||
const _finishSearch = (resultCount) => {
|
||||
Search.stopPulse();
|
||||
Search.title.innerText = _("Search Results");
|
||||
if (!resultCount)
|
||||
Search.status.innerText = Documentation.gettext(
|
||||
"Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
|
||||
);
|
||||
else
|
||||
Search.status.innerText = _(
|
||||
`Search finished, found ${resultCount} page(s) matching the search query.`
|
||||
);
|
||||
};
|
||||
const _displayNextItem = (
|
||||
results,
|
||||
resultCount,
|
||||
highlightTerms,
|
||||
searchTerms
|
||||
) => {
|
||||
// results left, load the summary and display it
|
||||
// this is intended to be dynamic (don't sub resultsCount)
|
||||
if (results.length) {
|
||||
_displayItem(results.pop(), highlightTerms, searchTerms);
|
||||
setTimeout(
|
||||
() => _displayNextItem(results, resultCount, highlightTerms, searchTerms),
|
||||
5
|
||||
);
|
||||
}
|
||||
// search finished, update title and status message
|
||||
else _finishSearch(resultCount);
|
||||
};
|
||||
|
||||
/**
|
||||
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
|
||||
* custom function per language.
|
||||
*
|
||||
* The regular expression works by splitting the string on consecutive characters
|
||||
* that are not Unicode letters, numbers, underscores, or emoji characters.
|
||||
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
|
||||
*/
|
||||
if (typeof splitQuery === "undefined") {
|
||||
var splitQuery = (query) => query
|
||||
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
|
||||
.filter(term => term) // remove remaining empty strings
|
||||
}
|
||||
|
||||
/**
|
||||
* Search Module
|
||||
*/
|
||||
const Search = {
|
||||
_index: null,
|
||||
_queued_query: null,
|
||||
_pulse_status: -1,
|
||||
|
||||
htmlToText: (htmlString) => {
|
||||
const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
|
||||
htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() });
|
||||
const docContent = htmlElement.querySelector('[role="main"]');
|
||||
if (docContent !== undefined) return docContent.textContent;
|
||||
console.warn(
|
||||
"Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template."
|
||||
);
|
||||
return "";
|
||||
},
|
||||
|
||||
init: () => {
|
||||
const query = new URLSearchParams(window.location.search).get("q");
|
||||
document
|
||||
.querySelectorAll('input[name="q"]')
|
||||
.forEach((el) => (el.value = query));
|
||||
if (query) Search.performSearch(query);
|
||||
},
|
||||
|
||||
loadIndex: (url) =>
|
||||
(document.body.appendChild(document.createElement("script")).src = url),
|
||||
|
||||
setIndex: (index) => {
|
||||
Search._index = index;
|
||||
if (Search._queued_query !== null) {
|
||||
const query = Search._queued_query;
|
||||
Search._queued_query = null;
|
||||
Search.query(query);
|
||||
}
|
||||
},
|
||||
|
||||
hasIndex: () => Search._index !== null,
|
||||
|
||||
deferQuery: (query) => (Search._queued_query = query),
|
||||
|
||||
stopPulse: () => (Search._pulse_status = -1),
|
||||
|
||||
startPulse: () => {
|
||||
if (Search._pulse_status >= 0) return;
|
||||
|
||||
const pulse = () => {
|
||||
Search._pulse_status = (Search._pulse_status + 1) % 4;
|
||||
Search.dots.innerText = ".".repeat(Search._pulse_status);
|
||||
if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
|
||||
};
|
||||
pulse();
|
||||
},
|
||||
|
||||
/**
|
||||
* perform a search for something (or wait until index is loaded)
|
||||
*/
|
||||
performSearch: (query) => {
|
||||
// create the required interface elements
|
||||
const searchText = document.createElement("h2");
|
||||
searchText.textContent = _("Searching");
|
||||
const searchSummary = document.createElement("p");
|
||||
searchSummary.classList.add("search-summary");
|
||||
searchSummary.innerText = "";
|
||||
const searchList = document.createElement("ul");
|
||||
searchList.classList.add("search");
|
||||
|
||||
const out = document.getElementById("search-results");
|
||||
Search.title = out.appendChild(searchText);
|
||||
Search.dots = Search.title.appendChild(document.createElement("span"));
|
||||
Search.status = out.appendChild(searchSummary);
|
||||
Search.output = out.appendChild(searchList);
|
||||
|
||||
const searchProgress = document.getElementById("search-progress");
|
||||
// Some themes don't use the search progress node
|
||||
if (searchProgress) {
|
||||
searchProgress.innerText = _("Preparing search...");
|
||||
}
|
||||
Search.startPulse();
|
||||
|
||||
// index already loaded, the browser was quick!
|
||||
if (Search.hasIndex()) Search.query(query);
|
||||
else Search.deferQuery(query);
|
||||
},
|
||||
|
||||
/**
|
||||
* execute search (requires search index to be loaded)
|
||||
*/
|
||||
query: (query) => {
|
||||
// stem the search terms and add them to the correct list
|
||||
const stemmer = new Stemmer();
|
||||
const searchTerms = new Set();
|
||||
const excludedTerms = new Set();
|
||||
const highlightTerms = new Set();
|
||||
const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
|
||||
splitQuery(query.trim()).forEach((queryTerm) => {
|
||||
const queryTermLower = queryTerm.toLowerCase();
|
||||
|
||||
// maybe skip this "word"
|
||||
// stopwords array is from language_data.js
|
||||
if (
|
||||
stopwords.indexOf(queryTermLower) !== -1 ||
|
||||
queryTerm.match(/^\d+$/)
|
||||
)
|
||||
return;
|
||||
|
||||
// stem the word
|
||||
let word = stemmer.stemWord(queryTermLower);
|
||||
// select the correct list
|
||||
if (word[0] === "-") excludedTerms.add(word.substr(1));
|
||||
else {
|
||||
searchTerms.add(word);
|
||||
highlightTerms.add(queryTermLower);
|
||||
}
|
||||
});
|
||||
|
||||
// console.debug("SEARCH: searching for:");
|
||||
// console.info("required: ", [...searchTerms]);
|
||||
// console.info("excluded: ", [...excludedTerms]);
|
||||
|
||||
// array of [docname, title, anchor, descr, score, filename]
|
||||
let results = [];
|
||||
_removeChildren(document.getElementById("search-progress"));
|
||||
|
||||
// lookup as object
|
||||
objectTerms.forEach((term) =>
|
||||
results.push(...Search.performObjectSearch(term, objectTerms))
|
||||
);
|
||||
|
||||
// lookup as search terms in fulltext
|
||||
results.push(...Search.performTermsSearch(searchTerms, excludedTerms));
|
||||
|
||||
// let the scorer override scores with a custom scoring function
|
||||
if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item)));
|
||||
|
||||
// now sort the results by score (in opposite order of appearance, since the
|
||||
// display function below uses pop() to retrieve items) and then
|
||||
// alphabetically
|
||||
results.sort((a, b) => {
|
||||
const leftScore = a[4];
|
||||
const rightScore = b[4];
|
||||
if (leftScore === rightScore) {
|
||||
// same score: sort alphabetically
|
||||
const leftTitle = a[1].toLowerCase();
|
||||
const rightTitle = b[1].toLowerCase();
|
||||
if (leftTitle === rightTitle) return 0;
|
||||
return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
|
||||
}
|
||||
return leftScore > rightScore ? 1 : -1;
|
||||
});
|
||||
|
||||
// remove duplicate search results
|
||||
// note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
|
||||
let seen = new Set();
|
||||
results = results.reverse().reduce((acc, result) => {
|
||||
let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
|
||||
if (!seen.has(resultStr)) {
|
||||
acc.push(result);
|
||||
seen.add(resultStr);
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
|
||||
results = results.reverse();
|
||||
|
||||
// for debugging
|
||||
//Search.lastresults = results.slice(); // a copy
|
||||
// console.info("search results:", Search.lastresults);
|
||||
|
||||
// print the results
|
||||
_displayNextItem(results, results.length, highlightTerms, searchTerms);
|
||||
},
|
||||
|
||||
/**
|
||||
* search for object names
|
||||
*/
|
||||
performObjectSearch: (object, objectTerms) => {
|
||||
const filenames = Search._index.filenames;
|
||||
const docNames = Search._index.docnames;
|
||||
const objects = Search._index.objects;
|
||||
const objNames = Search._index.objnames;
|
||||
const titles = Search._index.titles;
|
||||
|
||||
const results = [];
|
||||
|
||||
const objectSearchCallback = (prefix, match) => {
|
||||
const name = match[4]
|
||||
const fullname = (prefix ? prefix + "." : "") + name;
|
||||
const fullnameLower = fullname.toLowerCase();
|
||||
if (fullnameLower.indexOf(object) < 0) return;
|
||||
|
||||
let score = 0;
|
||||
const parts = fullnameLower.split(".");
|
||||
|
||||
// check for different match types: exact matches of full name or
|
||||
// "last name" (i.e. last dotted part)
|
||||
if (fullnameLower === object || parts.slice(-1)[0] === object)
|
||||
score += Scorer.objNameMatch;
|
||||
else if (parts.slice(-1)[0].indexOf(object) > -1)
|
||||
score += Scorer.objPartialMatch; // matches in last name
|
||||
|
||||
const objName = objNames[match[1]][2];
|
||||
const title = titles[match[0]];
|
||||
|
||||
// If more than one term searched for, we require other words to be
|
||||
// found in the name/title/description
|
||||
const otherTerms = new Set(objectTerms);
|
||||
otherTerms.delete(object);
|
||||
if (otherTerms.size > 0) {
|
||||
const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
|
||||
if (
|
||||
[...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
|
||||
)
|
||||
return;
|
||||
}
|
||||
|
||||
let anchor = match[3];
|
||||
if (anchor === "") anchor = fullname;
|
||||
else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
|
||||
|
||||
const descr = objName + _(", in ") + title;
|
||||
|
||||
// add custom score for some objects according to scorer
|
||||
if (Scorer.objPrio.hasOwnProperty(match[2]))
|
||||
score += Scorer.objPrio[match[2]];
|
||||
else score += Scorer.objPrioDefault;
|
||||
|
||||
results.push([
|
||||
docNames[match[0]],
|
||||
fullname,
|
||||
"#" + anchor,
|
||||
descr,
|
||||
score,
|
||||
filenames[match[0]],
|
||||
]);
|
||||
};
|
||||
Object.keys(objects).forEach((prefix) =>
|
||||
objects[prefix].forEach((array) =>
|
||||
objectSearchCallback(prefix, array)
|
||||
)
|
||||
);
|
||||
return results;
|
||||
},
|
||||
|
||||
/**
|
||||
* search for full-text terms in the index
|
||||
*/
|
||||
performTermsSearch: (searchTerms, excludedTerms) => {
|
||||
// prepare search
|
||||
const terms = Search._index.terms;
|
||||
const titleTerms = Search._index.titleterms;
|
||||
const docNames = Search._index.docnames;
|
||||
const filenames = Search._index.filenames;
|
||||
const titles = Search._index.titles;
|
||||
|
||||
const scoreMap = new Map();
|
||||
const fileMap = new Map();
|
||||
|
||||
// perform the search on the required terms
|
||||
searchTerms.forEach((word) => {
|
||||
const files = [];
|
||||
const arr = [
|
||||
{ files: terms[word], score: Scorer.term },
|
||||
{ files: titleTerms[word], score: Scorer.title },
|
||||
];
|
||||
// add support for partial matches
|
||||
if (word.length > 2) {
|
||||
const escapedWord = _escapeRegExp(word);
|
||||
Object.keys(terms).forEach((term) => {
|
||||
if (term.match(escapedWord) && !terms[word])
|
||||
arr.push({ files: terms[term], score: Scorer.partialTerm });
|
||||
});
|
||||
Object.keys(titleTerms).forEach((term) => {
|
||||
if (term.match(escapedWord) && !titleTerms[word])
|
||||
arr.push({ files: titleTerms[word], score: Scorer.partialTitle });
|
||||
});
|
||||
}
|
||||
|
||||
// no match but word was a required one
|
||||
if (arr.every((record) => record.files === undefined)) return;
|
||||
|
||||
// found search word in contents
|
||||
arr.forEach((record) => {
|
||||
if (record.files === undefined) return;
|
||||
|
||||
let recordFiles = record.files;
|
||||
if (recordFiles.length === undefined) recordFiles = [recordFiles];
|
||||
files.push(...recordFiles);
|
||||
|
||||
// set score for the word in each file
|
||||
recordFiles.forEach((file) => {
|
||||
if (!scoreMap.has(file)) scoreMap.set(file, {});
|
||||
scoreMap.get(file)[word] = record.score;
|
||||
});
|
||||
});
|
||||
|
||||
// create the mapping
|
||||
files.forEach((file) => {
|
||||
if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1)
|
||||
fileMap.get(file).push(word);
|
||||
else fileMap.set(file, [word]);
|
||||
});
|
||||
});
|
||||
|
||||
// now check if the files don't contain excluded terms
|
||||
const results = [];
|
||||
for (const [file, wordList] of fileMap) {
|
||||
// check if all requirements are matched
|
||||
|
||||
// as search terms with length < 3 are discarded
|
||||
const filteredTermCount = [...searchTerms].filter(
|
||||
(term) => term.length > 2
|
||||
).length;
|
||||
if (
|
||||
wordList.length !== searchTerms.size &&
|
||||
wordList.length !== filteredTermCount
|
||||
)
|
||||
continue;
|
||||
|
||||
// ensure that none of the excluded terms is in the search result
|
||||
if (
|
||||
[...excludedTerms].some(
|
||||
(term) =>
|
||||
terms[term] === file ||
|
||||
titleTerms[term] === file ||
|
||||
(terms[term] || []).includes(file) ||
|
||||
(titleTerms[term] || []).includes(file)
|
||||
)
|
||||
)
|
||||
break;
|
||||
|
||||
// select one (max) score for the file.
|
||||
const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
|
||||
// add result to the result list
|
||||
results.push([
|
||||
docNames[file],
|
||||
titles[file],
|
||||
"",
|
||||
null,
|
||||
score,
|
||||
filenames[file],
|
||||
]);
|
||||
}
|
||||
return results;
|
||||
},
|
||||
|
||||
/**
|
||||
* helper function to return a node containing the
|
||||
* search summary for a given text. keywords is a list
|
||||
* of stemmed words, highlightWords is the list of normal, unstemmed
|
||||
* words. the first one is used to find the occurrence, the
|
||||
* latter for highlighting it.
|
||||
*/
|
||||
makeSearchSummary: (htmlText, keywords, highlightWords) => {
|
||||
const text = Search.htmlToText(htmlText);
|
||||
if (text === "") return null;
|
||||
|
||||
const textLower = text.toLowerCase();
|
||||
const actualStartPosition = [...keywords]
|
||||
.map((k) => textLower.indexOf(k.toLowerCase()))
|
||||
.filter((i) => i > -1)
|
||||
.slice(-1)[0];
|
||||
const startWithContext = Math.max(actualStartPosition - 120, 0);
|
||||
|
||||
const top = startWithContext === 0 ? "" : "...";
|
||||
const tail = startWithContext + 240 < text.length ? "..." : "";
|
||||
|
||||
let summary = document.createElement("p");
|
||||
summary.classList.add("context");
|
||||
summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
|
||||
|
||||
highlightWords.forEach((highlightWord) =>
|
||||
_highlightText(summary, highlightWord, "highlighted")
|
||||
);
|
||||
|
||||
return summary;
|
||||
},
|
||||
};
|
||||
|
||||
_ready(Search.init);
|
1
_static/torch-gt-v1.6.0-green.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: >= 1.6.0"><title>torch: >= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">>= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">>= 1.6.0</text></g></svg>
|
After Width: | Height: | Size: 1.1 KiB |
2042
_static/underscore-1.13.1.js
Normal file
6
_static/underscore.js
Normal file
182
contributing/code-style.html
Normal file
@ -0,0 +1,182 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Follow the code style — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="How to create a recipe" href="how-to-create-a-recipe.html" />
|
||||
<link rel="prev" title="Contributing to Documentation" href="doc.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Contributing</a><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="doc.html">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Follow the code style</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html">How to create a recipe</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="index.html">Contributing</a> »</li>
|
||||
<li>Follow the code style</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/contributing/code-style.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="follow-the-code-style">
|
||||
<span id="id1"></span><h1>Follow the code style<a class="headerlink" href="#follow-the-code-style" title="Permalink to this heading"></a></h1>
|
||||
<p>We use the following tools to make the code style to be as consistent as possible:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><a class="reference external" href="https://github.com/psf/black">black</a>, to format the code</p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/PyCQA/flake8">flake8</a>, to check the style and quality of the code</p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/PyCQA/isort">isort</a>, to sort <code class="docutils literal notranslate"><span class="pre">imports</span></code></p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>The following versions of the above tools are used:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">black</span> <span class="pre">==</span> <span class="pre">12.6b0</span></code></p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">flake8</span> <span class="pre">==</span> <span class="pre">3.9.2</span></code></p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">isort</span> <span class="pre">==</span> <span class="pre">5.9.2</span></code></p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>After running the following commands:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ git clone https://github.com/k2-fsa/icefall
|
||||
$ cd icefall
|
||||
$ pip install pre-commit
|
||||
$ pre-commit install
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>it will run the following checks whenever you run <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code>, <strong>automatically</strong>:</p>
|
||||
<blockquote>
|
||||
<div><figure class="align-center" id="id2">
|
||||
<a class="reference internal image-reference" href="../_images/pre-commit-check.png"><img alt="../_images/pre-commit-check.png" src="../_images/pre-commit-check.png" style="width: 600px;" /></a>
|
||||
<figcaption>
|
||||
<p><span class="caption-number">Fig. 7 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Failed).</span><a class="headerlink" href="#id2" title="Permalink to this image"></a></p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</div></blockquote>
|
||||
<p>If any of the above checks failed, your <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> was not successful.
|
||||
Please fix any issues reported by the check tools.</p>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>Some of the check tools, i.e., <code class="docutils literal notranslate"><span class="pre">black</span></code> and <code class="docutils literal notranslate"><span class="pre">isort</span></code> will modify
|
||||
the files to be commited <strong>in-place</strong>. So please run <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">status</span></code>
|
||||
after failure to see which file has been modified by the tools
|
||||
before you make any further changes.</p>
|
||||
</div>
|
||||
<p>After fixing all the failures, run <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> again and
|
||||
it should succeed this time:</p>
|
||||
<blockquote>
|
||||
<div><figure class="align-center" id="id3">
|
||||
<a class="reference internal image-reference" href="../_images/pre-commit-check-success.png"><img alt="../_images/pre-commit-check-success.png" src="../_images/pre-commit-check-success.png" style="width: 600px;" /></a>
|
||||
<figcaption>
|
||||
<p><span class="caption-number">Fig. 8 </span><span class="caption-text">pre-commit hooks invoked by <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code> (Succeeded).</span><a class="headerlink" href="#id3" title="Permalink to this image"></a></p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</div></blockquote>
|
||||
<p>If you want to check the style of your code before <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">commit</span></code>, you
|
||||
can do the following:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> icefall
|
||||
$ pip install <span class="nv">black</span><span class="o">==</span><span class="m">21</span>.6b0 <span class="nv">flake8</span><span class="o">==</span><span class="m">3</span>.9.2 <span class="nv">isort</span><span class="o">==</span><span class="m">5</span>.9.2
|
||||
$ black --check your_changed_file.py
|
||||
$ black your_changed_file.py <span class="c1"># modify it in-place</span>
|
||||
$
|
||||
$ flake8 your_changed_file.py
|
||||
$
|
||||
$ isort --check your_changed_file.py <span class="c1"># modify it in-place</span>
|
||||
$ isort your_changed_file.py
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="doc.html" class="btn btn-neutral float-left" title="Contributing to Documentation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="how-to-create-a-recipe.html" class="btn btn-neutral float-right" title="How to create a recipe" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
156
contributing/doc.html
Normal file
@ -0,0 +1,156 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Contributing to Documentation — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Follow the code style" href="code-style.html" />
|
||||
<link rel="prev" title="Contributing" href="index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Contributing</a><ul class="current">
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="code-style.html">Follow the code style</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html">How to create a recipe</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="index.html">Contributing</a> »</li>
|
||||
<li>Contributing to Documentation</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/contributing/doc.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="contributing-to-documentation">
|
||||
<h1>Contributing to Documentation<a class="headerlink" href="#contributing-to-documentation" title="Permalink to this heading"></a></h1>
|
||||
<p>We use <a class="reference external" href="https://www.sphinx-doc.org/en/master/">sphinx</a>
|
||||
for documentation.</p>
|
||||
<p>Before writing documentation, you have to prepare the environment:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> docs
|
||||
$ pip install -r requirements.txt
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>After setting up the environment, you are ready to write documentation.
|
||||
Please refer to <a class="reference external" href="https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html">reStructuredText Primer</a>
|
||||
if you are not familiar with <code class="docutils literal notranslate"><span class="pre">reStructuredText</span></code>.</p>
|
||||
<p>After writing some documentation, you can build the documentation <strong>locally</strong>
|
||||
to preview what it looks like if it is published:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> docs
|
||||
$ make html
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>The generated documentation is in <code class="docutils literal notranslate"><span class="pre">docs/build/html</span></code> and can be viewed
|
||||
with the following commands:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> docs/build/html
|
||||
$ python3 -m http.server
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>It will print:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Serving</span> <span class="n">HTTP</span> <span class="n">on</span> <span class="mf">0.0.0.0</span> <span class="n">port</span> <span class="mi">8000</span> <span class="p">(</span><span class="n">http</span><span class="p">:</span><span class="o">//</span><span class="mf">0.0.0.0</span><span class="p">:</span><span class="mi">8000</span><span class="o">/</span><span class="p">)</span> <span class="o">...</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>Open your browser, go to <a class="reference external" href="http://0.0.0.0:8000/">http://0.0.0.0:8000/</a>, and you will see
|
||||
the following:</p>
|
||||
<blockquote>
|
||||
<div><figure class="align-center" id="id1">
|
||||
<a class="reference internal image-reference" href="../_images/doc-contrib.png"><img alt="../_images/doc-contrib.png" src="../_images/doc-contrib.png" style="width: 600px;" /></a>
|
||||
<figcaption>
|
||||
<p><span class="caption-number">Fig. 6 </span><span class="caption-text">View generated documentation locally with <code class="docutils literal notranslate"><span class="pre">python3</span> <span class="pre">-m</span> <span class="pre">http.server</span></code>.</span><a class="headerlink" href="#id1" title="Permalink to this image"></a></p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="index.html" class="btn btn-neutral float-left" title="Contributing" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="code-style.html" class="btn btn-neutral float-right" title="Follow the code style" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
258
contributing/how-to-create-a-recipe.html
Normal file
@ -0,0 +1,258 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>How to create a recipe — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Huggingface" href="../huggingface/index.html" />
|
||||
<link rel="prev" title="Follow the code style" href="code-style.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Contributing</a><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="doc.html">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="code-style.html">Follow the code style</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">How to create a recipe</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#data-preparation">Data Preparation</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#training">Training</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#decoding">Decoding</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#pre-trained-model">Pre-trained model</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="index.html">Contributing</a> »</li>
|
||||
<li>How to create a recipe</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/contributing/how-to-create-a-recipe.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="how-to-create-a-recipe">
|
||||
<h1>How to create a recipe<a class="headerlink" href="#how-to-create-a-recipe" title="Permalink to this heading"></a></h1>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>Please read <a class="reference internal" href="code-style.html#follow-the-code-style"><span class="std std-ref">Follow the code style</span></a> to adjust your code sytle.</p>
|
||||
</div>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">icefall</span></code> is designed to be as Pythonic as possible. Please use
|
||||
Python in your recipe if possible.</p>
|
||||
</div>
|
||||
<section id="data-preparation">
|
||||
<h2>Data Preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
|
||||
<p>We recommend you to prepare your training/test/validate dataset
|
||||
with <a class="reference external" href="https://github.com/lhotse-speech/lhotse">lhotse</a>.</p>
|
||||
<p>Please refer to <a class="reference external" href="https://lhotse.readthedocs.io/en/latest/index.html">https://lhotse.readthedocs.io/en/latest/index.html</a>
|
||||
for how to create a recipe in <code class="docutils literal notranslate"><span class="pre">lhotse</span></code>.</p>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>The <code class="docutils literal notranslate"><span class="pre">yesno</span></code> recipe in <code class="docutils literal notranslate"><span class="pre">lhotse</span></code> is a very good example.</p>
|
||||
<p>Please refer to <a class="reference external" href="https://github.com/lhotse-speech/lhotse/pull/380">https://github.com/lhotse-speech/lhotse/pull/380</a>,
|
||||
which shows how to add a new recipe to <code class="docutils literal notranslate"><span class="pre">lhotse</span></code>.</p>
|
||||
</div>
|
||||
<p>Suppose you would like to add a recipe for a dataset named <code class="docutils literal notranslate"><span class="pre">foo</span></code>.
|
||||
You can do the following:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs
|
||||
$ mkdir -p foo/ASR
|
||||
$ cd foo/ASR
|
||||
$ touch prepare.sh
|
||||
$ chmod +x prepare.sh
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If your dataset is very simple, please follow
|
||||
<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh">egs/yesno/ASR/prepare.sh</a>
|
||||
to write your own <code class="docutils literal notranslate"><span class="pre">prepare.sh</span></code>.
|
||||
Otherwise, please refer to
|
||||
<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/prepare.sh">egs/librispeech/ASR/prepare.sh</a>
|
||||
to prepare your data.</p>
|
||||
</section>
|
||||
<section id="training">
|
||||
<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
|
||||
<p>Assume you have a fancy model, called <code class="docutils literal notranslate"><span class="pre">bar</span></code> for the <code class="docutils literal notranslate"><span class="pre">foo</span></code> recipe, you can
|
||||
organize your files in the following way:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/foo/ASR
|
||||
$ mkdir bar
|
||||
$ cd bar
|
||||
$ touch README.md model.py train.py decode.py asr_datamodule.py pretrained.py
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>For instance , the <code class="docutils literal notranslate"><span class="pre">yesno</span></code> recipe has a <code class="docutils literal notranslate"><span class="pre">tdnn</span></code> model and its directory structure
|
||||
looks like the following:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>egs/yesno/ASR/tdnn/
|
||||
<span class="p">|</span>-- README.md
|
||||
<span class="p">|</span>-- asr_datamodule.py
|
||||
<span class="p">|</span>-- decode.py
|
||||
<span class="p">|</span>-- model.py
|
||||
<span class="p">|</span>-- pretrained.py
|
||||
<span class="sb">`</span>-- train.py
|
||||
</pre></div>
|
||||
</div>
|
||||
<p><strong>File description</strong>:</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">README.md</span></code></p>
|
||||
<p>It contains information of this recipe, e.g., how to run it, what the WER is, etc.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">asr_datamodule.py</span></code></p>
|
||||
<p>It provides code to create PyTorch dataloaders with train/test/validation dataset.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">decode.py</span></code></p>
|
||||
<p>It takes as inputs the checkpoints saved during the training stage to decode the test
|
||||
dataset(s).</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">model.py</span></code></p>
|
||||
<p>It contains the definition of your fancy neural network model.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">pretrained.py</span></code></p>
|
||||
<p>We can use this script to do inference with a pre-trained model.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">train.py</span></code></p>
|
||||
<p>It contains training code.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>Please take a look at</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn">egs/yesno/tdnn</a></p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/tdnn_lstm_ctc">egs/librispeech/tdnn_lstm_ctc</a></p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conformer_ctc">egs/librispeech/conformer_ctc</a></p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>to get a feel what the resulting files look like.</p>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>Every model in a recipe is kept to be as self-contained as possible.
|
||||
We tolerate duplicate code among different recipes.</p>
|
||||
</div>
|
||||
<p>The training stage should be invocable by:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/foo/ASR
|
||||
$ ./bar/train.py
|
||||
$ ./bar/train.py --help
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="decoding">
|
||||
<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
|
||||
<p>Please refer to</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/decode.py">https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/conformer_ctc/decode.py</a></p>
|
||||
<p>If your model is transformer/conformer based.</p>
|
||||
</li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/tdnn_lstm_ctc/decode.py">https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/tdnn_lstm_ctc/decode.py</a></p>
|
||||
<p>If your model is TDNN/LSTM based, i.e., there is no attention decoder.</p>
|
||||
</li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/tdnn/decode.py">https://github.com/k2-fsa/icefall/blob/master/egs/yesno/ASR/tdnn/decode.py</a></p>
|
||||
<p>If there is no LM rescoring.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>The decoding stage should be invocable by:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/foo/ASR
|
||||
$ ./bar/decode.py
|
||||
$ ./bar/decode.py --help
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="pre-trained-model">
|
||||
<h2>Pre-trained model<a class="headerlink" href="#pre-trained-model" title="Permalink to this heading"></a></h2>
|
||||
<p>Please demonstrate how to use your model for inference in <code class="docutils literal notranslate"><span class="pre">egs/foo/ASR/bar/pretrained.py</span></code>.
|
||||
If possible, please consider creating a Colab notebook to show that.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="code-style.html" class="btn btn-neutral float-left" title="Follow the code style" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="../huggingface/index.html" class="btn btn-neutral float-right" title="Huggingface" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
148
contributing/index.html
Normal file
@ -0,0 +1,148 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Contributing — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Contributing to Documentation" href="doc.html" />
|
||||
<link rel="prev" title="TDNN-CTC" href="../recipes/yesno/tdnn.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Contributing</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="doc.html">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="code-style.html">Follow the code style</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html">How to create a recipe</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li>Contributing</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/contributing/index.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="contributing">
|
||||
<h1>Contributing<a class="headerlink" href="#contributing" title="Permalink to this heading"></a></h1>
|
||||
<p>Contributions to <code class="docutils literal notranslate"><span class="pre">icefall</span></code> are very welcomed.
|
||||
There are many possible ways to make contributions and
|
||||
two of them are:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p>To write documentation</p></li>
|
||||
<li><p>To write code</p>
|
||||
<ul>
|
||||
<li><ol class="arabic simple">
|
||||
<li><p>To follow the code style in the repository</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple" start="2">
|
||||
<li><p>To write a new recipe</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>In this page, we describe how to contribute documentation
|
||||
and code to <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="doc.html">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="code-style.html">Follow the code style</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="how-to-create-a-recipe.html">How to create a recipe</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html#data-preparation">Data Preparation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html#training">Training</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html#decoding">Decoding</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="how-to-create-a-recipe.html#pre-trained-model">Pre-trained model</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="../recipes/yesno/tdnn.html" class="btn btn-neutral float-left" title="TDNN-CTC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="doc.html" class="btn btn-neutral float-right" title="Contributing to Documentation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
105
genindex.html
Normal file
@ -0,0 +1,105 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Index — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||
<script src="_static/jquery.js"></script>
|
||||
<script src="_static/underscore.js"></script>
|
||||
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="_static/doctools.js"></script>
|
||||
<script src="_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="#" />
|
||||
<link rel="search" title="Search" href="search.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="index.html" class="icon icon-home"></a> »</li>
|
||||
<li>Index</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/genindex" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
|
||||
<h1 id="index">Index</h1>
|
||||
|
||||
<div class="genindex-jumpbox">
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
125
huggingface/index.html
Normal file
@ -0,0 +1,125 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Huggingface — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Pre-trained models" href="pretrained-models.html" />
|
||||
<link rel="prev" title="How to create a recipe" href="../contributing/how-to-create-a-recipe.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Huggingface</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="pretrained-models.html">Pre-trained models</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="spaces.html">Huggingface spaces</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li>Huggingface</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/huggingface/index.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="huggingface">
|
||||
<h1>Huggingface<a class="headerlink" href="#huggingface" title="Permalink to this heading"></a></h1>
|
||||
<p>This section describes how to find pre-trained models.
|
||||
It also demonstrates how to try them from within your browser
|
||||
without installing anything by using
|
||||
<a class="reference external" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition">Huggingface spaces</a>.</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="pretrained-models.html">Pre-trained models</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="spaces.html">Huggingface spaces</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="spaces.html#youtube-video">YouTube Video</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="../contributing/how-to-create-a-recipe.html" class="btn btn-neutral float-left" title="How to create a recipe" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="pretrained-models.html" class="btn btn-neutral float-right" title="Pre-trained models" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
127
huggingface/pretrained-models.html
Normal file
@ -0,0 +1,127 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Pre-trained models — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Huggingface spaces" href="spaces.html" />
|
||||
<link rel="prev" title="Huggingface" href="index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Huggingface</a><ul class="current">
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Pre-trained models</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="spaces.html">Huggingface spaces</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="index.html">Huggingface</a> »</li>
|
||||
<li>Pre-trained models</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/huggingface/pretrained-models.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="pre-trained-models">
|
||||
<h1>Pre-trained models<a class="headerlink" href="#pre-trained-models" title="Permalink to this heading"></a></h1>
|
||||
<p>We have uploaded pre-trained models for all recipes in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>
|
||||
to <a class="reference external" href="https://huggingface.co/">https://huggingface.co/</a>.</p>
|
||||
<p>You can find them by visiting the following link:</p>
|
||||
<p><a class="reference external" href="https://huggingface.co/models?search=icefall">https://huggingface.co/models?search=icefall</a>.</p>
|
||||
<p>You can also find links of pre-trained models for a specific recipe
|
||||
by looking at the corresponding <code class="docutils literal notranslate"><span class="pre">RESULTS.md</span></code>. For instance:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md">https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md</a></p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/RESULTS.md">https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/RESULTS.md</a></p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/gigaspeech/ASR/RESULTS.md">https://github.com/k2-fsa/icefall/blob/master/egs/gigaspeech/ASR/RESULTS.md</a></p></li>
|
||||
<li><p><a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/wenetspeech/ASR/RESULTS.md">https://github.com/k2-fsa/icefall/blob/master/egs/wenetspeech/ASR/RESULTS.md</a></p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="index.html" class="btn btn-neutral float-left" title="Huggingface" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="spaces.html" class="btn btn-neutral float-right" title="Huggingface spaces" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
162
huggingface/spaces.html
Normal file
@ -0,0 +1,162 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Huggingface spaces — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="prev" title="Pre-trained models" href="pretrained-models.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="index.html">Huggingface</a><ul class="current">
|
||||
<li class="toctree-l2"><a class="reference internal" href="pretrained-models.html">Pre-trained models</a></li>
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">Huggingface spaces</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#youtube-video">YouTube Video</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="index.html">Huggingface</a> »</li>
|
||||
<li>Huggingface spaces</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/huggingface/spaces.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="huggingface-spaces">
|
||||
<h1>Huggingface spaces<a class="headerlink" href="#huggingface-spaces" title="Permalink to this heading"></a></h1>
|
||||
<p>We have integrated the server framework
|
||||
<a class="reference external" href="http://github.com/k2-fsa/sherpa">sherpa</a>
|
||||
with <a class="reference external" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition">Huggingface spaces</a>
|
||||
so that you can try pre-trained models from within your browser
|
||||
without the need to download or install anything.</p>
|
||||
<p>All you need is a browser, which can be run on Windows, macOS, Linux, or even on your
|
||||
iPad and your phone.</p>
|
||||
<p>Start your browser and visit the following address:</p>
|
||||
<p><a class="reference external" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition">https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition</a></p>
|
||||
<p>and you will see a page like the following screenshot:</p>
|
||||
<a class="reference external image-reference" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition"><img alt="screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_" src="../_images/hugging-face-sherpa.png" /></a>
|
||||
<p>You can:</p>
|
||||
<blockquote>
|
||||
<div><ol class="arabic simple">
|
||||
<li><p>Select a language for recognition. Currently, we provide pre-trained models
|
||||
from <code class="docutils literal notranslate"><span class="pre">icefall</span></code> for the following languages: <code class="docutils literal notranslate"><span class="pre">Chinese</span></code>, <code class="docutils literal notranslate"><span class="pre">English</span></code>, and
|
||||
<code class="docutils literal notranslate"><span class="pre">Chinese+English</span></code>.</p></li>
|
||||
<li><p>After selecting the target language, you can select a pre-trained model
|
||||
corresponding to the language.</p></li>
|
||||
<li><p>Select the decoding method. Currently, it provides <code class="docutils literal notranslate"><span class="pre">greedy</span> <span class="pre">search</span></code>
|
||||
and <code class="docutils literal notranslate"><span class="pre">modified_beam_search</span></code>.</p></li>
|
||||
<li><p>If you selected <code class="docutils literal notranslate"><span class="pre">modified_beam_search</span></code>, you can choose the number of
|
||||
active paths during the search.</p></li>
|
||||
<li><p>Either upload a file or record your speech for recognition.</p></li>
|
||||
<li><p>Click the button <code class="docutils literal notranslate"><span class="pre">Submit</span> <span class="pre">for</span> <span class="pre">recognition</span></code>.</p></li>
|
||||
<li><p>Wait for a moment and you will get the recognition results.</p></li>
|
||||
</ol>
|
||||
</div></blockquote>
|
||||
<p>The following screenshot shows an example when selecting <code class="docutils literal notranslate"><span class="pre">Chinese+English</span></code>:</p>
|
||||
<a class="reference external image-reference" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition"><img alt="screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_" src="../_images/hugging-face-sherpa-3.png" /></a>
|
||||
<p>In the bottom part of the page, you can find a table of examples. You can click
|
||||
one of them and then click <code class="docutils literal notranslate"><span class="pre">Submit</span> <span class="pre">for</span> <span class="pre">recognition</span></code>.</p>
|
||||
<a class="reference external image-reference" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition"><img alt="screenshot of `<https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition>`_" src="../_images/hugging-face-sherpa-2.png" /></a>
|
||||
<section id="youtube-video">
|
||||
<h2>YouTube Video<a class="headerlink" href="#youtube-video" title="Permalink to this heading"></a></h2>
|
||||
<p>We provide the following YouTube video demonstrating how to use
|
||||
<a class="reference external" href="https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition">https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition</a>.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>To get the latest news of <a class="reference external" href="https://github.com/k2-fsa">next-gen Kaldi</a>, please subscribe
|
||||
the following YouTube channel by <a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">Nadira Povey</a>:</p>
|
||||
<blockquote>
|
||||
<div><p><a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw</a></p>
|
||||
</div></blockquote>
|
||||
</div>
|
||||
<div class="video_wrapper" style="">
|
||||
<iframe allowfullscreen="true" src="https://www.youtube.com/embed/ElN3r9dkKE4" style="border: 0; height: 345px; width: 560px">
|
||||
</iframe></div></section>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="pretrained-models.html" class="btn btn-neutral float-left" title="Pre-trained models" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
142
index.html
Normal file
@ -0,0 +1,142 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Icefall — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
|
||||
<script src="_static/jquery.js"></script>
|
||||
<script src="_static/underscore.js"></script>
|
||||
<script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="_static/doctools.js"></script>
|
||||
<script src="_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="genindex.html" />
|
||||
<link rel="search" title="Search" href="search.html" />
|
||||
<link rel="next" title="Installation" href="installation/index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="#" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="#">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="#" class="icon icon-home"></a> »</li>
|
||||
<li>Icefall</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/index.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="icefall">
|
||||
<h1>Icefall<a class="headerlink" href="#icefall" title="Permalink to this heading"></a></h1>
|
||||
<a class="reference external image-reference" href="https://github.com/k2-fsa/icefall"><img alt="icefall logo" class="align-center" src="_images/logo.png" style="width: 168px;" /></a>
|
||||
<p>Documentation for <a class="reference external" href="https://github.com/k2-fsa/icefall">icefall</a>, containing
|
||||
speech recognition recipes using <a class="reference external" href="https://github.com/k2-fsa/k2">k2</a>.</p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="installation/index.html">Installation</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#install-pytorch-and-torchaudio">(0) Install PyTorch and torchaudio</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#install-k2">(1) Install k2</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#install-lhotse">(2) Install lhotse</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#download-icefall">(3) Download icefall</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#installation-example">Installation example</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#test-your-installation">Test Your Installation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="installation/index.html#youtube-video">YouTube Video</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="recipes/index.html">Recipes</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="recipes/aishell/index.html">aishell</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="recipes/librispeech/index.html">LibriSpeech</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="recipes/timit/index.html">TIMIT</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="recipes/yesno/index.html">YesNo</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="contributing/index.html">Contributing</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="contributing/doc.html">Contributing to Documentation</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="contributing/code-style.html">Follow the code style</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="contributing/how-to-create-a-recipe.html">How to create a recipe</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="huggingface/index.html">Huggingface</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="huggingface/pretrained-models.html">Pre-trained models</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="huggingface/spaces.html">Huggingface spaces</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="installation/index.html" class="btn btn-neutral float-right" title="Installation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
575
installation/index.html
Normal file
@ -0,0 +1,575 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Installation — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
|
||||
<script src="../_static/jquery.js"></script>
|
||||
<script src="../_static/underscore.js"></script>
|
||||
<script src="../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../_static/doctools.js"></script>
|
||||
<script src="../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../genindex.html" />
|
||||
<link rel="search" title="Search" href="../search.html" />
|
||||
<link rel="next" title="Recipes" href="../recipes/index.html" />
|
||||
<link rel="prev" title="Icefall" href="../index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1 current"><a class="current reference internal" href="#">Installation</a><ul>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#install-pytorch-and-torchaudio">(0) Install PyTorch and torchaudio</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#install-k2">(1) Install k2</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#install-lhotse">(2) Install lhotse</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#download-icefall">(3) Download icefall</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#installation-example">Installation example</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#create-a-virtual-environment">(1) Create a virtual environment</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#activate-your-virtual-environment">(2) Activate your virtual environment</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#id1">(3) Install k2</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#id2">(4) Install lhotse</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#id3">(5) Download icefall</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#test-your-installation">Test Your Installation</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#data-preparation">Data preparation</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#training">Training</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="#decoding">Decoding</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="#youtube-video">YouTube Video</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../recipes/index.html">Recipes</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../index.html" class="icon icon-home"></a> »</li>
|
||||
<li>Installation</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/installation/index.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="installation">
|
||||
<span id="install-icefall"></span><h1>Installation<a class="headerlink" href="#installation" title="Permalink to this heading"></a></h1>
|
||||
<ul class="simple">
|
||||
<li><p><img alt="Supported operating systems" src="../_images/os-Linux_macOS-ff69b4.svg" /></p></li>
|
||||
<li><p><img alt="Supported devices" src="../_images/device-CPU_CUDA-orange.svg" /></p></li>
|
||||
<li><p><img alt="Supported python versions" src="../_images/python-gt-v3.6-blue.svg" /></p></li>
|
||||
<li><p><img alt="Supported PyTorch versions" src="../_images/torch-gt-v1.6.0-green.svg" /></p></li>
|
||||
<li><p><img alt="Supported k2 versions" src="../_images/k2-gt-v1.9-blueviolet.svg" /></p></li>
|
||||
</ul>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">icefall</span></code> depends on <a class="reference external" href="https://github.com/k2-fsa/k2">k2</a> and
|
||||
<a class="reference external" href="https://github.com/lhotse-speech/lhotse">lhotse</a>.</p>
|
||||
<p>We recommend you to use the following steps to install the dependencies.</p>
|
||||
<ul class="simple">
|
||||
<li><ol class="arabic simple" start="0">
|
||||
<li><p>Install PyTorch and torchaudio</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple">
|
||||
<li><p>Install k2</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple" start="2">
|
||||
<li><p>Install lhotse</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
</ul>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>Installation order matters.</p>
|
||||
</div>
|
||||
<section id="install-pytorch-and-torchaudio">
|
||||
<h2>(0) Install PyTorch and torchaudio<a class="headerlink" href="#install-pytorch-and-torchaudio" title="Permalink to this heading"></a></h2>
|
||||
<p>Please refer <a class="reference external" href="https://pytorch.org/">https://pytorch.org/</a> to install PyTorch
|
||||
and torchaudio.</p>
|
||||
</section>
|
||||
<section id="install-k2">
|
||||
<h2>(1) Install k2<a class="headerlink" href="#install-k2" title="Permalink to this heading"></a></h2>
|
||||
<p>Please refer to <a class="reference external" href="https://k2-fsa.github.io/k2/installation/index.html">https://k2-fsa.github.io/k2/installation/index.html</a>
|
||||
to install <code class="docutils literal notranslate"><span class="pre">k2</span></code>.</p>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>You need to install <code class="docutils literal notranslate"><span class="pre">k2</span></code> with a version at least <strong>v1.9</strong>.</p>
|
||||
</div>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>If you have already installed PyTorch and don’t want to replace it,
|
||||
please install a version of <code class="docutils literal notranslate"><span class="pre">k2</span></code> that is compiled against the version
|
||||
of PyTorch you are using.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="install-lhotse">
|
||||
<h2>(2) Install lhotse<a class="headerlink" href="#install-lhotse" title="Permalink to this heading"></a></h2>
|
||||
<p>Please refer to <a class="reference external" href="https://lhotse.readthedocs.io/en/latest/getting-started.html#installation">https://lhotse.readthedocs.io/en/latest/getting-started.html#installation</a>
|
||||
to install <code class="docutils literal notranslate"><span class="pre">lhotse</span></code>.</p>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>We strongly recommend you to use:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">pip</span> <span class="n">install</span> <span class="n">git</span><span class="o">+</span><span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">github</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">lhotse</span><span class="o">-</span><span class="n">speech</span><span class="o">/</span><span class="n">lhotse</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>to install the latest version of lhotse.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="download-icefall">
|
||||
<h2>(3) Download icefall<a class="headerlink" href="#download-icefall" title="Permalink to this heading"></a></h2>
|
||||
<p><code class="docutils literal notranslate"><span class="pre">icefall</span></code> is a collection of Python scripts; what you need is to download it
|
||||
and set the environment variable <code class="docutils literal notranslate"><span class="pre">PYTHONPATH</span></code> to point to it.</p>
|
||||
<p>Assume you want to place <code class="docutils literal notranslate"><span class="pre">icefall</span></code> in the folder <code class="docutils literal notranslate"><span class="pre">/tmp</span></code>. The
|
||||
following commands show you how to setup <code class="docutils literal notranslate"><span class="pre">icefall</span></code>:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="nb">cd</span> /tmp
|
||||
git clone https://github.com/k2-fsa/icefall
|
||||
<span class="nb">cd</span> icefall
|
||||
pip install -r requirements.txt
|
||||
<span class="nb">export</span> <span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>You can put several versions of <code class="docutils literal notranslate"><span class="pre">icefall</span></code> in the same virtual environment.
|
||||
To switch among different versions of <code class="docutils literal notranslate"><span class="pre">icefall</span></code>, just set <code class="docutils literal notranslate"><span class="pre">PYTHONPATH</span></code>
|
||||
to point to the version you want.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="installation-example">
|
||||
<h2>Installation example<a class="headerlink" href="#installation-example" title="Permalink to this heading"></a></h2>
|
||||
<p>The following shows an example about setting up the environment.</p>
|
||||
<section id="create-a-virtual-environment">
|
||||
<h3>(1) Create a virtual environment<a class="headerlink" href="#create-a-virtual-environment" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ virtualenv -p python3.8 test-icefall
|
||||
|
||||
created virtual environment CPython3.8.6.final.0-64 <span class="k">in</span> 1540ms
|
||||
creator CPython3Posix<span class="o">(</span><span class="nv">dest</span><span class="o">=</span>/ceph-fj/fangjun/test-icefall, <span class="nv">clear</span><span class="o">=</span>False, <span class="nv">no_vcs_ignore</span><span class="o">=</span>False, <span class="nv">global</span><span class="o">=</span>False<span class="o">)</span>
|
||||
seeder FromAppData<span class="o">(</span><span class="nv">download</span><span class="o">=</span>False, <span class="nv">pip</span><span class="o">=</span>bundle, <span class="nv">setuptools</span><span class="o">=</span>bundle, <span class="nv">wheel</span><span class="o">=</span>bundle, <span class="nv">via</span><span class="o">=</span>copy, <span class="nv">app_data_dir</span><span class="o">=</span>/root/fangjun/.local/share/v
|
||||
irtualenv<span class="o">)</span>
|
||||
added seed packages: <span class="nv">pip</span><span class="o">==</span><span class="m">21</span>.1.3, <span class="nv">setuptools</span><span class="o">==</span><span class="m">57</span>.4.0, <span class="nv">wheel</span><span class="o">==</span><span class="m">0</span>.36.2
|
||||
activators BashActivator,CShellActivator,FishActivator,PowerShellActivator,PythonActivator,XonshActivator
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="activate-your-virtual-environment">
|
||||
<h3>(2) Activate your virtual environment<a class="headerlink" href="#activate-your-virtual-environment" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">source</span> test-icefall/bin/activate
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="id1">
|
||||
<h3>(3) Install k2<a class="headerlink" href="#id1" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ pip install <span class="nv">k2</span><span class="o">==</span><span class="m">1</span>.4.dev20210822+cpu.torch1.9.0 -f https://k2-fsa.org/nightly/index.html
|
||||
|
||||
Looking <span class="k">in</span> links: https://k2-fsa.org/nightly/index.html
|
||||
Collecting <span class="nv">k2</span><span class="o">==</span><span class="m">1</span>.4.dev20210822+cpu.torch1.9.0
|
||||
Downloading https://k2-fsa.org/nightly/whl/k2-1.4.dev20210822%2Bcpu.torch1.9.0-cp38-cp38-linux_x86_64.whl <span class="o">(</span><span class="m">1</span>.6 MB<span class="o">)</span>
|
||||
<span class="p">|</span>________________________________<span class="p">|</span> <span class="m">1</span>.6 MB <span class="m">185</span> kB/s
|
||||
Collecting graphviz
|
||||
Downloading graphviz-0.17-py3-none-any.whl <span class="o">(</span><span class="m">18</span> kB<span class="o">)</span>
|
||||
Collecting <span class="nv">torch</span><span class="o">==</span><span class="m">1</span>.9.0
|
||||
Using cached torch-1.9.0-cp38-cp38-manylinux1_x86_64.whl <span class="o">(</span><span class="m">831</span>.4 MB<span class="o">)</span>
|
||||
Collecting typing-extensions
|
||||
Using cached typing_extensions-3.10.0.0-py3-none-any.whl <span class="o">(</span><span class="m">26</span> kB<span class="o">)</span>
|
||||
Installing collected packages: typing-extensions, torch, graphviz, k2
|
||||
Successfully installed graphviz-0.17 k2-1.4.dev20210822+cpu.torch1.9.0 torch-1.9.0 typing-extensions-3.10.0.0
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition warning">
|
||||
<p class="admonition-title">Warning</p>
|
||||
<p>We choose to install a CPU version of k2 for testing. You would probably want to install
|
||||
a CUDA version of k2.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="id2">
|
||||
<h3>(4) Install lhotse<a class="headerlink" href="#id2" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ pip install git+https://github.com/lhotse-speech/lhotse
|
||||
|
||||
Collecting git+https://github.com/lhotse-speech/lhotse
|
||||
Cloning https://github.com/lhotse-speech/lhotse to /tmp/pip-req-build-7b1b76ge
|
||||
Running command git clone -q https://github.com/lhotse-speech/lhotse /tmp/pip-req-build-7b1b76ge
|
||||
Collecting audioread>=2.1.9
|
||||
Using cached audioread-2.1.9-py3-none-any.whl
|
||||
Collecting SoundFile>=0.10
|
||||
Using cached SoundFile-0.10.3.post1-py2.py3-none-any.whl (21 kB)
|
||||
Collecting click>=7.1.1
|
||||
Using cached click-8.0.1-py3-none-any.whl (97 kB)
|
||||
Collecting cytoolz>=0.10.1
|
||||
Using cached cytoolz-0.11.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.9 MB)
|
||||
Collecting dataclasses
|
||||
Using cached dataclasses-0.6-py3-none-any.whl (14 kB)
|
||||
Collecting h5py>=2.10.0
|
||||
Downloading h5py-3.4.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB)
|
||||
|________________________________| 4.5 MB 684 kB/s
|
||||
Collecting intervaltree>=3.1.0
|
||||
Using cached intervaltree-3.1.0-py2.py3-none-any.whl
|
||||
Collecting lilcom>=1.1.0
|
||||
Using cached lilcom-1.1.1-cp38-cp38-linux_x86_64.whl
|
||||
Collecting numpy>=1.18.1
|
||||
Using cached numpy-1.21.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.8 MB)
|
||||
Collecting packaging
|
||||
Using cached packaging-21.0-py3-none-any.whl (40 kB)
|
||||
Collecting pyyaml>=5.3.1
|
||||
Using cached PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl (662 kB)
|
||||
Collecting tqdm
|
||||
Downloading tqdm-4.62.1-py2.py3-none-any.whl (76 kB)
|
||||
|________________________________| 76 kB 2.7 MB/s
|
||||
Collecting torchaudio==0.9.0
|
||||
Downloading torchaudio-0.9.0-cp38-cp38-manylinux1_x86_64.whl (1.9 MB)
|
||||
|________________________________| 1.9 MB 73.1 MB/s
|
||||
Requirement already satisfied: torch==1.9.0 in ./test-icefall/lib/python3.8/site-packages (from torchaudio==0.9.0->lhotse===0.8.0.dev
|
||||
-2a1410b-clean) (1.9.0)
|
||||
Requirement already satisfied: typing-extensions in ./test-icefall/lib/python3.8/site-packages (from torch==1.9.0->torchaudio==0.9.0-
|
||||
>lhotse===0.8.0.dev-2a1410b-clean) (3.10.0.0)
|
||||
Collecting toolz>=0.8.0
|
||||
Using cached toolz-0.11.1-py3-none-any.whl (55 kB)
|
||||
Collecting sortedcontainers<3.0,>=2.0
|
||||
Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl (29 kB)
|
||||
Collecting cffi>=1.0
|
||||
Using cached cffi-1.14.6-cp38-cp38-manylinux1_x86_64.whl (411 kB)
|
||||
Collecting pycparser
|
||||
Using cached pycparser-2.20-py2.py3-none-any.whl (112 kB)
|
||||
Collecting pyparsing>=2.0.2
|
||||
Using cached pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)
|
||||
Building wheels for collected packages: lhotse
|
||||
Building wheel for lhotse (setup.py) ... done
|
||||
Created wheel for lhotse: filename=lhotse-0.8.0.dev_2a1410b_clean-py3-none-any.whl size=342242 sha256=f683444afa4dc0881133206b4646a
|
||||
9d0f774224cc84000f55d0a67f6e4a37997
|
||||
Stored in directory: /tmp/pip-ephem-wheel-cache-ftu0qysz/wheels/7f/7a/8e/a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f
|
||||
WARNING: Built wheel for lhotse is invalid: Metadata 1.2 mandates PEP 440 version, but '0.8.0.dev-2a1410b-clean' is not
|
||||
Failed to build lhotse
|
||||
Installing collected packages: pycparser, toolz, sortedcontainers, pyparsing, numpy, cffi, tqdm, torchaudio, SoundFile, pyyaml, packa
|
||||
ging, lilcom, intervaltree, h5py, dataclasses, cytoolz, click, audioread, lhotse
|
||||
Running setup.py install for lhotse ... done
|
||||
DEPRECATION: lhotse was installed using the legacy 'setup.py install' method, because a wheel could not be built for it. A possible
|
||||
replacement is to fix the wheel build issue reported above. You can find discussion regarding this at https://github.com/pypa/pip/is
|
||||
sues/8368.
|
||||
Successfully installed SoundFile-0.10.3.post1 audioread-2.1.9 cffi-1.14.6 click-8.0.1 cytoolz-0.11.0 dataclasses-0.6 h5py-3.4.0 inter
|
||||
valtree-3.1.0 lhotse-0.8.0.dev-2a1410b-clean lilcom-1.1.1 numpy-1.21.2 packaging-21.0 pycparser-2.20 pyparsing-2.4.7 pyyaml-5.4.1 sor
|
||||
tedcontainers-2.4.0 toolz-0.11.1 torchaudio-0.9.0 tqdm-4.62.1
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="id3">
|
||||
<h3>(5) Download icefall<a class="headerlink" href="#id3" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd /tmp
|
||||
$ git clone https://github.com/k2-fsa/icefall
|
||||
|
||||
Cloning into 'icefall'...
|
||||
remote: Enumerating objects: 500, done.
|
||||
remote: Counting objects: 100% (500/500), done.
|
||||
remote: Compressing objects: 100% (308/308), done.
|
||||
remote: Total 500 (delta 263), reused 307 (delta 102), pack-reused 0
|
||||
Receiving objects: 100% (500/500), 172.49 KiB | 385.00 KiB/s, done.
|
||||
Resolving deltas: 100% (263/263), done.
|
||||
|
||||
$ cd icefall
|
||||
$ pip install -r requirements.txt
|
||||
|
||||
Collecting kaldilm
|
||||
Downloading kaldilm-1.8.tar.gz (48 kB)
|
||||
|________________________________| 48 kB 574 kB/s
|
||||
Collecting kaldialign
|
||||
Using cached kaldialign-0.2-cp38-cp38-linux_x86_64.whl
|
||||
Collecting sentencepiece>=0.1.96
|
||||
Using cached sentencepiece-0.1.96-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
|
||||
Collecting tensorboard
|
||||
Using cached tensorboard-2.6.0-py3-none-any.whl (5.6 MB)
|
||||
Requirement already satisfied: setuptools>=41.0.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r
|
||||
requirements.txt (line 4)) (57.4.0)
|
||||
Collecting absl-py>=0.4
|
||||
Using cached absl_py-0.13.0-py3-none-any.whl (132 kB)
|
||||
Collecting google-auth-oauthlib<0.5,>=0.4.1
|
||||
Using cached google_auth_oauthlib-0.4.5-py2.py3-none-any.whl (18 kB)
|
||||
Collecting grpcio>=1.24.3
|
||||
Using cached grpcio-1.39.0-cp38-cp38-manylinux2014_x86_64.whl (4.3 MB)
|
||||
Requirement already satisfied: wheel>=0.26 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r require
|
||||
ments.txt (line 4)) (0.36.2)
|
||||
Requirement already satisfied: numpy>=1.12.0 in /ceph-fj/fangjun/test-icefall/lib/python3.8/site-packages (from tensorboard->-r requi
|
||||
rements.txt (line 4)) (1.21.2)
|
||||
Collecting protobuf>=3.6.0
|
||||
Using cached protobuf-3.17.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.0 MB)
|
||||
Collecting werkzeug>=0.11.15
|
||||
Using cached Werkzeug-2.0.1-py3-none-any.whl (288 kB)
|
||||
Collecting tensorboard-data-server<0.7.0,>=0.6.0
|
||||
Using cached tensorboard_data_server-0.6.1-py3-none-manylinux2010_x86_64.whl (4.9 MB)
|
||||
Collecting google-auth<2,>=1.6.3
|
||||
Downloading google_auth-1.35.0-py2.py3-none-any.whl (152 kB)
|
||||
|________________________________| 152 kB 1.4 MB/s
|
||||
Collecting requests<3,>=2.21.0
|
||||
Using cached requests-2.26.0-py2.py3-none-any.whl (62 kB)
|
||||
Collecting tensorboard-plugin-wit>=1.6.0
|
||||
Using cached tensorboard_plugin_wit-1.8.0-py3-none-any.whl (781 kB)
|
||||
Collecting markdown>=2.6.8
|
||||
Using cached Markdown-3.3.4-py3-none-any.whl (97 kB)
|
||||
Collecting six
|
||||
Using cached six-1.16.0-py2.py3-none-any.whl (11 kB)
|
||||
Collecting cachetools<5.0,>=2.0.0
|
||||
Using cached cachetools-4.2.2-py3-none-any.whl (11 kB)
|
||||
Collecting rsa<5,>=3.1.4
|
||||
Using cached rsa-4.7.2-py3-none-any.whl (34 kB)
|
||||
Collecting pyasn1-modules>=0.2.1
|
||||
Using cached pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB)
|
||||
Collecting requests-oauthlib>=0.7.0
|
||||
Using cached requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
|
||||
Collecting pyasn1<0.5.0,>=0.4.6
|
||||
Using cached pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)
|
||||
Collecting urllib3<1.27,>=1.21.1
|
||||
Using cached urllib3-1.26.6-py2.py3-none-any.whl (138 kB)
|
||||
Collecting certifi>=2017.4.17
|
||||
Using cached certifi-2021.5.30-py2.py3-none-any.whl (145 kB)
|
||||
Collecting charset-normalizer~=2.0.0
|
||||
Using cached charset_normalizer-2.0.4-py3-none-any.whl (36 kB)
|
||||
Collecting idna<4,>=2.5
|
||||
Using cached idna-3.2-py3-none-any.whl (59 kB)
|
||||
Collecting oauthlib>=3.0.0
|
||||
Using cached oauthlib-3.1.1-py2.py3-none-any.whl (146 kB)
|
||||
Building wheels for collected packages: kaldilm
|
||||
Building wheel for kaldilm (setup.py) ... done
|
||||
Created wheel for kaldilm: filename=kaldilm-1.8-cp38-cp38-linux_x86_64.whl size=897233 sha256=eccb906cafcd45bf9a7e1a1718e4534254bfb
|
||||
f4c0d0cbc66eee6c88d68a63862
|
||||
Stored in directory: /root/fangjun/.cache/pip/wheels/85/7d/63/f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9
|
||||
Successfully built kaldilm
|
||||
Installing collected packages: urllib3, pyasn1, idna, charset-normalizer, certifi, six, rsa, requests, pyasn1-modules, oauthlib, cach
|
||||
etools, requests-oauthlib, google-auth, werkzeug, tensorboard-plugin-wit, tensorboard-data-server, protobuf, markdown, grpcio, google
|
||||
-auth-oauthlib, absl-py, tensorboard, sentencepiece, kaldilm, kaldialign
|
||||
Successfully installed absl-py-0.13.0 cachetools-4.2.2 certifi-2021.5.30 charset-normalizer-2.0.4 google-auth-1.35.0 google-auth-oaut
|
||||
hlib-0.4.5 grpcio-1.39.0 idna-3.2 kaldialign-0.2 kaldilm-1.8 markdown-3.3.4 oauthlib-3.1.1 protobuf-3.17.3 pyasn1-0.4.8 pyasn1-module
|
||||
s-0.2.8 requests-2.26.0 requests-oauthlib-1.3.0 rsa-4.7.2 sentencepiece-0.1.96 six-1.16.0 tensorboard-2.6.0 tensorboard-data-server-0
|
||||
.6.1 tensorboard-plugin-wit-1.8.0 urllib3-1.26.6 werkzeug-2.0.1
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
<section id="test-your-installation">
|
||||
<h2>Test Your Installation<a class="headerlink" href="#test-your-installation" title="Permalink to this heading"></a></h2>
|
||||
<p>To test that your installation is successful, let us run
|
||||
the <a class="reference external" href="https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR">yesno recipe</a>
|
||||
on CPU.</p>
|
||||
<section id="data-preparation">
|
||||
<h3>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">export</span> <span class="nv">PYTHONPATH</span><span class="o">=</span>/tmp/icefall:<span class="nv">$PYTHONPATH</span>
|
||||
$ <span class="nb">cd</span> /tmp/icefall
|
||||
$ <span class="nb">cd</span> egs/yesno/ASR
|
||||
$ ./prepare.sh
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The log of running <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> is:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>2021-08-23 19:27:26 (prepare.sh:24:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download
|
||||
2021-08-23 19:27:26 (prepare.sh:27:main) stage 0: Download data
|
||||
Downloading waves_yesno.tar.gz: 4.49MB [00:03, 1.39MB/s]
|
||||
2021-08-23 19:27:30 (prepare.sh:36:main) Stage 1: Prepare yesno manifest
|
||||
2021-08-23 19:27:31 (prepare.sh:42:main) Stage 2: Compute fbank for yesno
|
||||
2021-08-23 19:27:32,803 INFO [compute_fbank_yesno.py:52] Processing train
|
||||
Extracting and storing features: 100%|_______________________________________________________________| 90/90 [00:01<00:00, 80.57it/s]
|
||||
2021-08-23 19:27:34,085 INFO [compute_fbank_yesno.py:52] Processing test
|
||||
Extracting and storing features: 100%|______________________________________________________________| 30/30 [00:00<00:00, 248.21it/s]
|
||||
2021-08-23 19:27:34 (prepare.sh:48:main) Stage 3: Prepare lang
|
||||
2021-08-23 19:27:35 (prepare.sh:63:main) Stage 4: Prepare G
|
||||
/tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
|
||||
d(std::istream&):79
|
||||
[I] Reading \data\ section.
|
||||
/tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
|
||||
d(std::istream&):140
|
||||
[I] Reading \1-grams: section.
|
||||
2021-08-23 19:27:35 (prepare.sh:89:main) Stage 5: Compile HLG
|
||||
2021-08-23 19:27:35,928 INFO [compile_hlg.py:120] Processing data/lang_phone
|
||||
2021-08-23 19:27:35,929 INFO [lexicon.py:116] Converting L.pt to Linv.pt
|
||||
2021-08-23 19:27:35,931 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3
|
||||
2021-08-23 19:27:35,932 INFO [compile_hlg.py:52] Loading G.fst.txt
|
||||
2021-08-23 19:27:35,932 INFO [compile_hlg.py:62] Intersecting L and G
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:64] LG shape: (4, None)
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:66] Connecting LG
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None)
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:70] <class 'torch.Tensor'>
|
||||
2021-08-23 19:27:35,933 INFO [compile_hlg.py:71] Determinizing LG
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:74] <class '_k2.RaggedInt'>
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:76] Connecting LG after k2.determinize
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG
|
||||
2021-08-23 19:27:35,934 INFO [compile_hlg.py:87] LG shape after k2.remove_epsilon: (6, None)
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:92] Arc sorting LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:95] Composing H and LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:102] Connecting LG
|
||||
2021-08-23 19:27:35,935 INFO [compile_hlg.py:105] Arc sorting LG
|
||||
2021-08-23 19:27:35,936 INFO [compile_hlg.py:107] HLG.shape: (8, None)
|
||||
2021-08-23 19:27:35,936 INFO [compile_hlg.py:123] Saving HLG.pt to data/lang_phone
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="training">
|
||||
<h3>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h3>
|
||||
<p>Now let us run the training part:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ export CUDA_VISIBLE_DEVICES=""
|
||||
$ ./tdnn/train.py
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>We use <code class="docutils literal notranslate"><span class="pre">export</span> <span class="pre">CUDA_VISIBLE_DEVICES=""</span></code> so that <code class="docutils literal notranslate"><span class="pre">icefall</span></code> uses CPU
|
||||
even if there are GPUs available.</p>
|
||||
</div>
|
||||
<p>The training log is given below:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>2021-08-23 19:30:31,072 INFO [train.py:465] Training started
|
||||
2021-08-23 19:30:31,072 INFO [train.py:466] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01,
|
||||
'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, '
|
||||
best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_doub
|
||||
le_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'feature_dir': PosixPath('data/fbank'
|
||||
), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0
|
||||
, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
|
||||
2021-08-23 19:30:31,074 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
|
||||
2021-08-23 19:30:31,098 INFO [asr_datamodule.py:146] About to get train cuts
|
||||
2021-08-23 19:30:31,098 INFO [asr_datamodule.py:240] About to get train cuts
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:149] About to create train dataset
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:200] Using SingleCutSampler.
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:206] About to create train dataloader
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:219] About to get test cuts
|
||||
2021-08-23 19:30:31,102 INFO [asr_datamodule.py:246] About to get test cuts
|
||||
2021-08-23 19:30:31,357 INFO [train.py:416] Epoch 0, batch 0, batch avg loss 1.0789, total avg loss: 1.0789, batch size: 4
|
||||
2021-08-23 19:30:31,848 INFO [train.py:416] Epoch 0, batch 10, batch avg loss 0.5356, total avg loss: 0.7556, batch size: 4
|
||||
2021-08-23 19:30:32,301 INFO [train.py:432] Epoch 0, valid loss 0.9972, best valid loss: 0.9972 best valid epoch: 0
|
||||
2021-08-23 19:30:32,805 INFO [train.py:416] Epoch 0, batch 20, batch avg loss 0.2436, total avg loss: 0.5717, batch size: 3
|
||||
2021-08-23 19:30:33,109 INFO [train.py:432] Epoch 0, valid loss 0.4167, best valid loss: 0.4167 best valid epoch: 0
|
||||
2021-08-23 19:30:33,121 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-0.pt
|
||||
2021-08-23 19:30:33,325 INFO [train.py:416] Epoch 1, batch 0, batch avg loss 0.2214, total avg loss: 0.2214, batch size: 5
|
||||
2021-08-23 19:30:33,798 INFO [train.py:416] Epoch 1, batch 10, batch avg loss 0.0781, total avg loss: 0.1343, batch size: 5
|
||||
2021-08-23 19:30:34,065 INFO [train.py:432] Epoch 1, valid loss 0.0859, best valid loss: 0.0859 best valid epoch: 1
|
||||
2021-08-23 19:30:34,556 INFO [train.py:416] Epoch 1, batch 20, batch avg loss 0.0421, total avg loss: 0.0975, batch size: 3
|
||||
2021-08-23 19:30:34,810 INFO [train.py:432] Epoch 1, valid loss 0.0431, best valid loss: 0.0431 best valid epoch: 1
|
||||
2021-08-23 19:30:34,824 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-1.pt
|
||||
|
||||
... ...
|
||||
|
||||
2021-08-23 19:30:49,657 INFO [train.py:416] Epoch 13, batch 0, batch avg loss 0.0109, total avg loss: 0.0109, batch size: 5
|
||||
2021-08-23 19:30:49,984 INFO [train.py:416] Epoch 13, batch 10, batch avg loss 0.0093, total avg loss: 0.0096, batch size: 4
|
||||
2021-08-23 19:30:50,239 INFO [train.py:432] Epoch 13, valid loss 0.0104, best valid loss: 0.0101 best valid epoch: 12
|
||||
2021-08-23 19:30:50,569 INFO [train.py:416] Epoch 13, batch 20, batch avg loss 0.0092, total avg loss: 0.0096, batch size: 2
|
||||
2021-08-23 19:30:50,819 INFO [train.py:432] Epoch 13, valid loss 0.0101, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:50,835 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-13.pt
|
||||
2021-08-23 19:30:51,024 INFO [train.py:416] Epoch 14, batch 0, batch avg loss 0.0105, total avg loss: 0.0105, batch size: 5
|
||||
2021-08-23 19:30:51,317 INFO [train.py:416] Epoch 14, batch 10, batch avg loss 0.0099, total avg loss: 0.0097, batch size: 4
|
||||
2021-08-23 19:30:51,552 INFO [train.py:432] Epoch 14, valid loss 0.0108, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:51,869 INFO [train.py:416] Epoch 14, batch 20, batch avg loss 0.0096, total avg loss: 0.0097, batch size: 5
|
||||
2021-08-23 19:30:52,107 INFO [train.py:432] Epoch 14, valid loss 0.0102, best valid loss: 0.0101 best valid epoch: 13
|
||||
2021-08-23 19:30:52,126 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-14.pt
|
||||
2021-08-23 19:30:52,128 INFO [train.py:537] Done!
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="decoding">
|
||||
<h3>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h3>
|
||||
<p>Let us use the trained model to decode the test set:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ ./tdnn/decode.py
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The decoding log is:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>2021-08-23 19:35:30,192 INFO [decode.py:249] Decoding started
|
||||
2021-08-23 19:35:30,192 INFO [decode.py:250] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
|
||||
2021-08-23 19:35:30,193 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
|
||||
2021-08-23 19:35:30,213 INFO [decode.py:259] device: cpu
|
||||
2021-08-23 19:35:30,217 INFO [decode.py:279] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
|
||||
/tmp/icefall/icefall/checkpoint.py:146: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch.
|
||||
It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
|
||||
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at /pytorch/aten/src/ATen/native/BinaryOps.cpp:450.)
|
||||
avg[k] //= n
|
||||
2021-08-23 19:35:30,220 INFO [asr_datamodule.py:219] About to get test cuts
|
||||
2021-08-23 19:35:30,220 INFO [asr_datamodule.py:246] About to get test cuts
|
||||
2021-08-23 19:35:30,409 INFO [decode.py:190] batch 0/8, cuts processed until now is 4
|
||||
2021-08-23 19:35:30,571 INFO [decode.py:228] The transcripts are stored in tdnn/exp/recogs-test_set.txt
|
||||
2021-08-23 19:35:30,572 INFO [utils.py:317] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
|
||||
2021-08-23 19:35:30,573 INFO [decode.py:236] Wrote detailed error stats to tdnn/exp/errs-test_set.txt
|
||||
2021-08-23 19:35:30,573 INFO [decode.py:299] Done!
|
||||
</pre></div>
|
||||
</div>
|
||||
<p><strong>Congratulations!</strong> You have successfully setup the environment and have run the first recipe in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||||
<p>Have fun with <code class="docutils literal notranslate"><span class="pre">icefall</span></code>!</p>
|
||||
</section>
|
||||
</section>
|
||||
<section id="youtube-video">
|
||||
<h2>YouTube Video<a class="headerlink" href="#youtube-video" title="Permalink to this heading"></a></h2>
|
||||
<p>We provide the following YouTube video showing how to install <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.
|
||||
It also shows how to debug various problems that you may encounter while
|
||||
using <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>To get the latest news of <a class="reference external" href="https://github.com/k2-fsa">next-gen Kaldi</a>, please subscribe
|
||||
the following YouTube channel by <a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">Nadira Povey</a>:</p>
|
||||
<blockquote>
|
||||
<div><p><a class="reference external" href="https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw">https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw</a></p>
|
||||
</div></blockquote>
|
||||
</div>
|
||||
<div class="video_wrapper" style="">
|
||||
<iframe allowfullscreen="true" src="https://www.youtube.com/embed/LVmrBD0tLfE" style="border: 0; height: 345px; width: 560px">
|
||||
</iframe></div></section>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="../index.html" class="btn btn-neutral float-left" title="Icefall" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="../recipes/index.html" class="btn btn-neutral float-right" title="Recipes" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
BIN
objects.inv
Normal file
816
recipes/aishell/conformer_ctc.html
Normal file
136
recipes/aishell/index.html
Normal file
@ -0,0 +1,136 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>aishell — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
<link rel="next" title="TDNN-LSTM CTC" href="tdnn_lstm_ctc.html" />
|
||||
<link rel="prev" title="Recipes" href="../index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Recipes</a><ul class="current">
|
||||
<li class="toctree-l2 current"><a class="current reference internal" href="#">aishell</a><ul>
|
||||
<li class="toctree-l3"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM CTC</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="stateless_transducer.html">Stateless Transducer</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../librispeech/index.html">LibriSpeech</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="../index.html">Recipes</a> »</li>
|
||||
<li>aishell</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/recipes/aishell/index.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="aishell">
|
||||
<h1>aishell<a class="headerlink" href="#aishell" title="Permalink to this heading"></a></h1>
|
||||
<p>Aishell is an open-source Chinese Mandarin speech corpus published by Beijing
|
||||
Shell Shell Technology Co.,Ltd.</p>
|
||||
<p>400 people from different accent areas in China are invited to participate in
|
||||
the recording, which is conducted in a quiet indoor environment using high
|
||||
fidelity microphone and downsampled to 16kHz. The manual transcription accuracy
|
||||
is above 95%, through professional speech annotation and strict quality
|
||||
inspection. The data is free for academic use. We hope to provide moderate
|
||||
amount of data for new researchers in the field of speech recognition.</p>
|
||||
<p>It can be downloaded from <a class="reference external" href="https://www.openslr.org/33/">https://www.openslr.org/33/</a></p>
|
||||
<div class="toctree-wrapper compound">
|
||||
<ul>
|
||||
<li class="toctree-l1"><a class="reference internal" href="tdnn_lstm_ctc.html">TDNN-LSTM CTC</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="stateless_transducer.html">Stateless Transducer</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="../index.html" class="btn btn-neutral float-left" title="Recipes" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="tdnn_lstm_ctc.html" class="btn btn-neutral float-right" title="TDNN-LSTM CTC" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
811
recipes/aishell/stateless_transducer.html
Normal file
607
recipes/aishell/tdnn_lstm_ctc.html
Normal file
@ -0,0 +1,607 @@
|
||||
<!DOCTYPE html>
|
||||
<html class="writer-html5" lang="en" >
|
||||
<head>
|
||||
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>TDNN-LSTM CTC — icefall 0.1 documentation</title>
|
||||
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
|
||||
<link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
|
||||
<!--[if lt IE 9]>
|
||||
<script src="../../_static/js/html5shiv.min.js"></script>
|
||||
<![endif]-->
|
||||
|
||||
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
|
||||
<script src="../../_static/jquery.js"></script>
|
||||
<script src="../../_static/underscore.js"></script>
|
||||
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
|
||||
<script src="../../_static/doctools.js"></script>
|
||||
<script src="../../_static/js/theme.js"></script>
|
||||
<link rel="index" title="Index" href="../../genindex.html" />
|
||||
<link rel="search" title="Search" href="../../search.html" />
|
||||
<link rel="next" title="Conformer CTC" href="conformer_ctc.html" />
|
||||
<link rel="prev" title="aishell" href="index.html" />
|
||||
</head>
|
||||
|
||||
<body class="wy-body-for-nav">
|
||||
<div class="wy-grid-for-nav">
|
||||
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
||||
<div class="wy-side-scroll">
|
||||
<div class="wy-side-nav-search" >
|
||||
<a href="../../index.html" class="icon icon-home"> icefall
|
||||
</a>
|
||||
<div role="search">
|
||||
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
||||
<input type="text" name="q" placeholder="Search docs" />
|
||||
<input type="hidden" name="check_keywords" value="yes" />
|
||||
<input type="hidden" name="area" value="default" />
|
||||
</form>
|
||||
</div>
|
||||
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
||||
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
|
||||
<ul class="current">
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../installation/index.html">Installation</a></li>
|
||||
<li class="toctree-l1 current"><a class="reference internal" href="../index.html">Recipes</a><ul class="current">
|
||||
<li class="toctree-l2 current"><a class="reference internal" href="index.html">aishell</a><ul class="current">
|
||||
<li class="toctree-l3 current"><a class="current reference internal" href="#">TDNN-LSTM CTC</a><ul>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#data-preparation">Data preparation</a></li>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#training">Training</a></li>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#decoding">Decoding</a></li>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#pre-trained-model">Pre-trained Model</a></li>
|
||||
<li class="toctree-l4"><a class="reference internal" href="#colab-notebook">Colab notebook</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="conformer_ctc.html">Conformer CTC</a></li>
|
||||
<li class="toctree-l3"><a class="reference internal" href="stateless_transducer.html">Stateless Transducer</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../librispeech/index.html">LibriSpeech</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../timit/index.html">TIMIT</a></li>
|
||||
<li class="toctree-l2"><a class="reference internal" href="../yesno/index.html">YesNo</a></li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../contributing/index.html">Contributing</a></li>
|
||||
<li class="toctree-l1"><a class="reference internal" href="../../huggingface/index.html">Huggingface</a></li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
||||
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
||||
<a href="../../index.html">icefall</a>
|
||||
</nav>
|
||||
|
||||
<div class="wy-nav-content">
|
||||
<div class="rst-content">
|
||||
<div role="navigation" aria-label="Page navigation">
|
||||
<ul class="wy-breadcrumbs">
|
||||
<li><a href="../../index.html" class="icon icon-home"></a> »</li>
|
||||
<li><a href="../index.html">Recipes</a> »</li>
|
||||
<li><a href="index.html">aishell</a> »</li>
|
||||
<li>TDNN-LSTM CTC</li>
|
||||
<li class="wy-breadcrumbs-aside">
|
||||
<a href="https://github.com/k2-fsa/icefall/blob/master/icefall/docs/source/recipes/aishell/tdnn_lstm_ctc.rst" class="fa fa-github"> Edit on GitHub</a>
|
||||
</li>
|
||||
</ul>
|
||||
<hr/>
|
||||
</div>
|
||||
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
||||
<div itemprop="articleBody">
|
||||
|
||||
<section id="tdnn-lstm-ctc">
|
||||
<h1>TDNN-LSTM CTC<a class="headerlink" href="#tdnn-lstm-ctc" title="Permalink to this heading"></a></h1>
|
||||
<p>This tutorial shows you how to run a tdnn-lstm ctc model
|
||||
with the <a class="reference external" href="https://www.openslr.org/33">Aishell</a> dataset.</p>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>We assume you have read the page <a class="reference internal" href="../../installation/index.html#install-icefall"><span class="std std-ref">Installation</span></a> and have setup
|
||||
the environment for <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||||
</div>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>We recommend you to use a GPU or several GPUs to run this recipe.</p>
|
||||
</div>
|
||||
<p>In this tutorial, you will learn:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><ol class="arabic simple">
|
||||
<li><p>How to prepare data for training and decoding</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple" start="2">
|
||||
<li><p>How to start the training, either with a single GPU or multiple GPUs</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple" start="3">
|
||||
<li><p>How to do decoding after training.</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="arabic simple" start="4">
|
||||
<li><p>How to use a pre-trained model, provided by us</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<section id="data-preparation">
|
||||
<h2>Data preparation<a class="headerlink" href="#data-preparation" title="Permalink to this heading"></a></h2>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./prepare.sh
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The script <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> handles the data preparation for you, <strong>automagically</strong>.
|
||||
All you need to do is to run it.</p>
|
||||
<p>The data preparation contains several stages, you can use the following two
|
||||
options:</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--stage</span></code></p></li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--stop-stage</span></code></p></li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>to control which stage(s) should be run. By default, all stages are executed.</p>
|
||||
<p>For example,</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./prepare.sh --stage <span class="m">0</span> --stop-stage <span class="m">0</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>means to run only stage 0.</p>
|
||||
<p>To run stage 2 to stage 5, use:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ ./prepare.sh --stage <span class="m">2</span> --stop-stage <span class="m">5</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>If you have pre-downloaded the <a class="reference external" href="https://www.openslr.org/33">Aishell</a>
|
||||
dataset and the <a class="reference external" href="http://www.openslr.org/17/">musan</a> dataset, say,
|
||||
they are saved in <code class="docutils literal notranslate"><span class="pre">/tmp/aishell</span></code> and <code class="docutils literal notranslate"><span class="pre">/tmp/musan</span></code>, you can modify
|
||||
the <code class="docutils literal notranslate"><span class="pre">dl_dir</span></code> variable in <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> to point to <code class="docutils literal notranslate"><span class="pre">/tmp</span></code> so that
|
||||
<code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code> won’t re-download them.</p>
|
||||
</div>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>A 3-gram language model will be downloaded from huggingface, we assume you have
|
||||
intalled and initialized <code class="docutils literal notranslate"><span class="pre">git-lfs</span></code>. If not, you could install <code class="docutils literal notranslate"><span class="pre">git-lfs</span></code> by</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ sudo apt-get install git-lfs
|
||||
$ git-lfs install
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>If you don’t have the <code class="docutils literal notranslate"><span class="pre">sudo</span></code> permission, you could download the
|
||||
<a class="reference external" href="https://github.com/git-lfs/git-lfs/releases">git-lfs binary</a> here, then add it to you <code class="docutils literal notranslate"><span class="pre">PATH</span></code>.</p>
|
||||
</div>
|
||||
<div class="admonition note">
|
||||
<p class="admonition-title">Note</p>
|
||||
<p>All generated files by <code class="docutils literal notranslate"><span class="pre">./prepare.sh</span></code>, e.g., features, lexicon, etc,
|
||||
are saved in <code class="docutils literal notranslate"><span class="pre">./data</span></code> directory.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="training">
|
||||
<h2>Training<a class="headerlink" href="#training" title="Permalink to this heading"></a></h2>
|
||||
<section id="configurable-options">
|
||||
<h3>Configurable options<a class="headerlink" href="#configurable-options" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --help
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>shows you the training options that can be passed from the commandline.
|
||||
The following options are used quite often:</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--num-epochs</span></code></p>
|
||||
<p>It is the number of epochs to train. For instance,
|
||||
<code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/train.py</span> <span class="pre">--num-epochs</span> <span class="pre">30</span></code> trains for 30 epochs
|
||||
and generates <code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …, <code class="docutils literal notranslate"><span class="pre">epoch-29.pt</span></code>
|
||||
in the folder <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/exp</span></code>.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--start-epoch</span></code></p>
|
||||
<p>It’s used to resume training.
|
||||
<code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/train.py</span> <span class="pre">--start-epoch</span> <span class="pre">10</span></code> loads the
|
||||
checkpoint <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/exp/epoch-9.pt</span></code> and starts
|
||||
training from epoch 10, based on the state from epoch 9.</p>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--world-size</span></code></p>
|
||||
<p>It is used for multi-GPU single-machine DDP training.</p>
|
||||
<blockquote>
|
||||
<div><ul class="simple">
|
||||
<li><ol class="loweralpha simple">
|
||||
<li><p>If it is 1, then no DDP training is used.</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
<li><ol class="loweralpha simple" start="2">
|
||||
<li><p>If it is 2, then GPU 0 and GPU 1 are used for DDP training.</p></li>
|
||||
</ol>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>The following shows some use cases with it.</p>
|
||||
<blockquote>
|
||||
<div><p><strong>Use case 1</strong>: You have 4 GPUs, but you only want to use GPU 0 and
|
||||
GPU 2 for training. You can do the following:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ <span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,2"</span>
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size <span class="m">2</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p><strong>Use case 2</strong>: You have 4 GPUs and you want to use all of them
|
||||
for training. You can do the following:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size <span class="m">4</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p><strong>Use case 3</strong>: You have 4 GPUs but you only want to use GPU 3
|
||||
for training. You can do the following:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ <span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"3"</span>
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size <span class="m">1</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
</div></blockquote>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>Only multi-GPU single-machine DDP training is implemented at present.
|
||||
Multi-GPU multi-machine DDP training will be added later.</p>
|
||||
</div>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||||
<p>It specifies the number of seconds over all utterances in a
|
||||
batch, before <strong>padding</strong>.
|
||||
If you encounter CUDA OOM, please reduce it. For instance, if
|
||||
your are using V100 NVIDIA GPU, we recommend you to set it to <code class="docutils literal notranslate"><span class="pre">2000</span></code>.</p>
|
||||
<div class="admonition hint">
|
||||
<p class="admonition-title">Hint</p>
|
||||
<p>Due to padding, the number of seconds of all utterances in a
|
||||
batch will usually be larger than <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code>.</p>
|
||||
<p>A larger value for <code class="docutils literal notranslate"><span class="pre">--max-duration</span></code> may cause OOM during training,
|
||||
while a smaller value may increase the training time. You have to
|
||||
tune it.</p>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="pre-configured-options">
|
||||
<h3>Pre-configured options<a class="headerlink" href="#pre-configured-options" title="Permalink to this heading"></a></h3>
|
||||
<p>There are some training options, e.g., weight decay,
|
||||
number of warmup steps, results dir, etc,
|
||||
that are not passed from the commandline.
|
||||
They are pre-configured by the function <code class="docutils literal notranslate"><span class="pre">get_params()</span></code> in
|
||||
<a class="reference external" href="https://github.com/k2-fsa/icefall/blob/master/egs/aishell/ASR/tdnn_lstm_ctc/train.py">tdnn_lstm_ctc/train.py</a></p>
|
||||
<p>You don’t need to change these pre-configured parameters. If you really need to change
|
||||
them, please modify <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/train.py</span></code> directly.</p>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>The training set is perturbed by speed with two factors: 0.9 and 1.1.
|
||||
Each epoch actually processes <code class="docutils literal notranslate"><span class="pre">3x150</span> <span class="pre">==</span> <span class="pre">450</span></code> hours of data.</p>
|
||||
</div>
|
||||
</section>
|
||||
<section id="training-logs">
|
||||
<h3>Training logs<a class="headerlink" href="#training-logs" title="Permalink to this heading"></a></h3>
|
||||
<p>Training logs and checkpoints are saved in <code class="docutils literal notranslate"><span class="pre">tdnn_lstm_ctc/exp</span></code>.
|
||||
You will find the following files in that directory:</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">epoch-0.pt</span></code>, <code class="docutils literal notranslate"><span class="pre">epoch-1.pt</span></code>, …</p>
|
||||
<p>These are checkpoint files, containing model <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> and optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code>.
|
||||
To resume training from some checkpoint, say <code class="docutils literal notranslate"><span class="pre">epoch-10.pt</span></code>, you can use:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ ./tdnn_lstm_ctc/train.py --start-epoch <span class="m">11</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">tensorboard/</span></code></p>
|
||||
<p>This folder contains TensorBoard logs. Training loss, validation loss, learning
|
||||
rate, etc, are recorded in these logs. You can visualize them by:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> tdnn_lstm_ctc/exp/tensorboard
|
||||
$ tensorboard dev upload --logdir . --description <span class="s2">"TDNN-LSTM CTC training for Aishell with icefall"</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>It will print something like below:</p>
|
||||
<blockquote>
|
||||
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">TensorFlow</span> <span class="n">installation</span> <span class="ow">not</span> <span class="n">found</span> <span class="o">-</span> <span class="n">running</span> <span class="k">with</span> <span class="n">reduced</span> <span class="n">feature</span> <span class="nb">set</span><span class="o">.</span>
|
||||
<span class="n">Upload</span> <span class="n">started</span> <span class="ow">and</span> <span class="n">will</span> <span class="k">continue</span> <span class="n">reading</span> <span class="nb">any</span> <span class="n">new</span> <span class="n">data</span> <span class="k">as</span> <span class="n">it</span><span class="s1">'s added to the logdir.</span>
|
||||
|
||||
<span class="n">To</span> <span class="n">stop</span> <span class="n">uploading</span><span class="p">,</span> <span class="n">press</span> <span class="n">Ctrl</span><span class="o">-</span><span class="n">C</span><span class="o">.</span>
|
||||
|
||||
<span class="n">New</span> <span class="n">experiment</span> <span class="n">created</span><span class="o">.</span> <span class="n">View</span> <span class="n">your</span> <span class="n">TensorBoard</span> <span class="n">at</span><span class="p">:</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">tensorboard</span><span class="o">.</span><span class="n">dev</span><span class="o">/</span><span class="n">experiment</span><span class="o">/</span><span class="n">LJI9MWUORLOw3jkdhxwk8A</span><span class="o">/</span>
|
||||
|
||||
<span class="p">[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span><span class="n">T11</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mi">23</span><span class="p">]</span> <span class="n">Started</span> <span class="n">scanning</span> <span class="n">logdir</span><span class="o">.</span>
|
||||
<span class="p">[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span><span class="n">T11</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mi">24</span><span class="p">]</span> <span class="n">Total</span> <span class="n">uploaded</span><span class="p">:</span> <span class="mi">4454</span> <span class="n">scalars</span><span class="p">,</span> <span class="mi">0</span> <span class="n">tensors</span><span class="p">,</span> <span class="mi">0</span> <span class="n">binary</span> <span class="n">objects</span>
|
||||
<span class="n">Listening</span> <span class="k">for</span> <span class="n">new</span> <span class="n">data</span> <span class="ow">in</span> <span class="n">logdir</span><span class="o">...</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</div></blockquote>
|
||||
<p>Note there is a URL in the above output, click it and you will see
|
||||
the following screenshot:</p>
|
||||
<blockquote>
|
||||
<div><figure class="align-center" id="id2">
|
||||
<a class="reference external image-reference" href="https://tensorboard.dev/experiment/LJI9MWUORLOw3jkdhxwk8A/"><img alt="TensorBoard screenshot" src="../../_images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg" style="width: 600px;" /></a>
|
||||
<figcaption>
|
||||
<p><span class="caption-number">Fig. 1 </span><span class="caption-text">TensorBoard screenshot.</span><a class="headerlink" href="#id2" title="Permalink to this image"></a></p>
|
||||
</figcaption>
|
||||
</figure>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">log/log-train-xxxx</span></code></p>
|
||||
<p>It is the detailed training log in text format, same as the one
|
||||
you saw printed to the console during training.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="usage-examples">
|
||||
<h3>Usage examples<a class="headerlink" href="#usage-examples" title="Permalink to this heading"></a></h3>
|
||||
<p>The following shows typical use cases:</p>
|
||||
<section id="case-1">
|
||||
<h4><strong>Case 1</strong><a class="headerlink" href="#case-1" title="Permalink to this heading"></a></h4>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ <span class="nb">export</span> <span class="nv">CUDA_VISIBLE_DEVICES</span><span class="o">=</span><span class="s2">"0,3"</span>
|
||||
$ ./tdnn_lstm_ctc/train.py --world-size <span class="m">2</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It uses GPU 0 and GPU 3 for DDP training.</p>
|
||||
</section>
|
||||
<section id="case-2">
|
||||
<h4><strong>Case 2</strong><a class="headerlink" href="#case-2" title="Permalink to this heading"></a></h4>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/train.py --num-epochs <span class="m">10</span> --start-epoch <span class="m">3</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>It loads checkpoint <code class="docutils literal notranslate"><span class="pre">./tdnn_lstm_ctc/exp/epoch-2.pt</span></code> and starts
|
||||
training from epoch 3. Also, it trains for 10 epochs.</p>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
<section id="decoding">
|
||||
<h2>Decoding<a class="headerlink" href="#decoding" title="Permalink to this heading"></a></h2>
|
||||
<p>The decoding part uses checkpoints saved by the training part, so you have
|
||||
to run the training part first.</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/decode.py --help
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>shows the options for decoding.</p>
|
||||
<p>The commonly used options are:</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--method</span></code></p>
|
||||
<p>This specifies the decoding method.</p>
|
||||
<p>The following command uses attention decoder for rescoring:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/decode.py --method 1best --max-duration 100
|
||||
</pre></div>
|
||||
</div>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">--max-duration</span></code></p>
|
||||
<p>It has the same meaning as the one during training. A larger
|
||||
value may cause OOM.</p>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
</section>
|
||||
<section id="pre-trained-model">
|
||||
<h2>Pre-trained Model<a class="headerlink" href="#pre-trained-model" title="Permalink to this heading"></a></h2>
|
||||
<p>We have uploaded a pre-trained model to
|
||||
<a class="reference external" href="https://huggingface.co/pkufool/icefall_asr_aishell_tdnn_lstm_ctc">https://huggingface.co/pkufool/icefall_asr_aishell_tdnn_lstm_ctc</a>.</p>
|
||||
<p>We describe how to use the pre-trained model to transcribe a sound file or
|
||||
multiple sound files in the following.</p>
|
||||
<section id="install-kaldifeat">
|
||||
<h3>Install kaldifeat<a class="headerlink" href="#install-kaldifeat" title="Permalink to this heading"></a></h3>
|
||||
<p><a class="reference external" href="https://github.com/csukuangfj/kaldifeat">kaldifeat</a> is used to
|
||||
extract features for a single sound file or multiple sound files
|
||||
at the same time.</p>
|
||||
<p>Please refer to <a class="reference external" href="https://github.com/csukuangfj/kaldifeat">https://github.com/csukuangfj/kaldifeat</a> for installation.</p>
|
||||
</section>
|
||||
<section id="download-the-pre-trained-model">
|
||||
<h3>Download the pre-trained model<a class="headerlink" href="#download-the-pre-trained-model" title="Permalink to this heading"></a></h3>
|
||||
<p>The following commands describe how to download the pre-trained model:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||||
$ mkdir tmp
|
||||
$ cd tmp
|
||||
$ git lfs install
|
||||
$ git clone https://huggingface.co/pkufool/icefall_asr_aishell_tdnn_lstm_ctc
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>You have to use <code class="docutils literal notranslate"><span class="pre">git</span> <span class="pre">lfs</span></code> to download the pre-trained model.</p>
|
||||
</div>
|
||||
<div class="admonition caution">
|
||||
<p class="admonition-title">Caution</p>
|
||||
<p>In order to use this pre-trained model, your k2 version has to be v1.7 or later.</p>
|
||||
</div>
|
||||
<p>After downloading, you will have the following files:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ tree tmp
|
||||
</pre></div>
|
||||
</div>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>tmp/
|
||||
<span class="sb">`</span>-- icefall_asr_aishell_tdnn_lstm_ctc
|
||||
<span class="p">|</span>-- README.md
|
||||
<span class="p">|</span>-- data
|
||||
<span class="p">|</span> <span class="sb">`</span>-- lang_phone
|
||||
<span class="p">|</span> <span class="p">|</span>-- HLG.pt
|
||||
<span class="p">|</span> <span class="p">|</span>-- tokens.txt
|
||||
<span class="p">|</span> <span class="sb">`</span>-- words.txt
|
||||
<span class="p">|</span>-- exp
|
||||
<span class="p">|</span> <span class="sb">`</span>-- pretrained.pt
|
||||
<span class="sb">`</span>-- test_waves
|
||||
<span class="p">|</span>-- BAC009S0764W0121.wav
|
||||
<span class="p">|</span>-- BAC009S0764W0122.wav
|
||||
<span class="p">|</span>-- BAC009S0764W0123.wav
|
||||
<span class="sb">`</span>-- trans.txt
|
||||
|
||||
<span class="m">5</span> directories, <span class="m">9</span> files
|
||||
</pre></div>
|
||||
</div>
|
||||
<p><strong>File descriptions</strong>:</p>
|
||||
<blockquote>
|
||||
<div><ul>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/HLG.pt</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It is the decoding graph.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/tokens.txt</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It contains tokens and their IDs.
|
||||
Provided only for convenience so that you can look up the SOS/EOS ID easily.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">data/lang_phone/words.txt</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It contains words and their IDs.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">exp/pretrained.pt</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It contains pre-trained model parameters, obtained by averaging
|
||||
checkpoints from <code class="docutils literal notranslate"><span class="pre">epoch-18.pt</span></code> to <code class="docutils literal notranslate"><span class="pre">epoch-40.pt</span></code>.
|
||||
Note: We have removed optimizer <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> to reduce file size.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/*.wav</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It contains some test sound files from Aishell <code class="docutils literal notranslate"><span class="pre">test</span></code> dataset.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
<li><p><code class="docutils literal notranslate"><span class="pre">test_waves/trans.txt</span></code></p>
|
||||
<blockquote>
|
||||
<div><p>It contains the reference transcripts for the sound files in <cite>test_waves/</cite>.</p>
|
||||
</div></blockquote>
|
||||
</li>
|
||||
</ul>
|
||||
</div></blockquote>
|
||||
<p>The information of the test sound files is listed below:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ soxi tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/*.wav
|
||||
|
||||
Input File : <span class="s1">'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0121.wav'</span>
|
||||
Channels : <span class="m">1</span>
|
||||
Sample Rate : <span class="m">16000</span>
|
||||
Precision : <span class="m">16</span>-bit
|
||||
Duration : <span class="m">00</span>:00:04.20 <span class="o">=</span> <span class="m">67263</span> samples ~ <span class="m">315</span>.295 CDDA sectors
|
||||
File Size : 135k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: <span class="m">16</span>-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : <span class="s1">'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0122.wav'</span>
|
||||
Channels : <span class="m">1</span>
|
||||
Sample Rate : <span class="m">16000</span>
|
||||
Precision : <span class="m">16</span>-bit
|
||||
Duration : <span class="m">00</span>:00:04.12 <span class="o">=</span> <span class="m">65840</span> samples ~ <span class="m">308</span>.625 CDDA sectors
|
||||
File Size : 132k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: <span class="m">16</span>-bit Signed Integer PCM
|
||||
|
||||
|
||||
Input File : <span class="s1">'tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0123.wav'</span>
|
||||
Channels : <span class="m">1</span>
|
||||
Sample Rate : <span class="m">16000</span>
|
||||
Precision : <span class="m">16</span>-bit
|
||||
Duration : <span class="m">00</span>:00:04.00 <span class="o">=</span> <span class="m">64000</span> samples ~ <span class="m">300</span> CDDA sectors
|
||||
File Size : 128k
|
||||
Bit Rate : 256k
|
||||
Sample Encoding: <span class="m">16</span>-bit Signed Integer PCM
|
||||
|
||||
Total Duration of <span class="m">3</span> files: <span class="m">00</span>:00:12.32
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
<section id="usage">
|
||||
<h3>Usage<a class="headerlink" href="#usage" title="Permalink to this heading"></a></h3>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>$ cd egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py --help
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>displays the help information.</p>
|
||||
<section id="hlg-decoding">
|
||||
<h4>HLG decoding<a class="headerlink" href="#hlg-decoding" title="Permalink to this heading"></a></h4>
|
||||
<p>HLG decoding uses the best path of the decoding lattice as the decoding result.</p>
|
||||
<p>The command to run HLG decoding is:</p>
|
||||
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>$ <span class="nb">cd</span> egs/aishell/ASR
|
||||
$ ./tdnn_lstm_ctc/pretrained.py <span class="se">\</span>
|
||||
--checkpoint ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/exp/pretrained.pt <span class="se">\</span>
|
||||
--words-file ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/data/lang_phone/words.txt <span class="se">\</span>
|
||||
--HLG ./tmp/icefall_asr_aishell_tdnn_lstm_ctc/data/lang_phone/HLG.pt <span class="se">\</span>
|
||||
--method 1best <span class="se">\</span>
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0121.wav <span class="se">\</span>
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0122.wav <span class="se">\</span>
|
||||
./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_wavs/BAC009S0764W0123.wav
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The output is given below:</p>
|
||||
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">00</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">858</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">140</span><span class="p">]</span> <span class="n">device</span><span class="p">:</span> <span class="n">cuda</span><span class="p">:</span><span class="mi">0</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">00</span><span class="p">:</span><span class="mi">55</span><span class="p">,</span><span class="mi">858</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">142</span><span class="p">]</span> <span class="n">Creating</span> <span class="n">model</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">05</span><span class="p">,</span><span class="mi">389</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">154</span><span class="p">]</span> <span class="n">Loading</span> <span class="n">HLG</span> <span class="kn">from</span> <span class="nn">.</span><span class="o">/</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">data</span><span class="o">/</span><span class="n">lang_phone</span><span class="o">/</span><span class="n">HLG</span><span class="o">.</span><span class="n">pt</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">06</span><span class="p">,</span><span class="mi">531</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">161</span><span class="p">]</span> <span class="n">Constructing</span> <span class="n">Fbank</span> <span class="n">computer</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">06</span><span class="p">,</span><span class="mi">536</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">171</span><span class="p">]</span> <span class="n">Reading</span> <span class="n">sound</span> <span class="n">files</span><span class="p">:</span> <span class="p">[</span><span class="s1">'./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0121.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0122.wav'</span><span class="p">,</span> <span class="s1">'./tmp/icefall_asr_aishell_tdnn_lstm_ctc/test_waves/BAC009S0764W0123.wav'</span><span class="p">]</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">06</span><span class="p">,</span><span class="mi">539</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">177</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">started</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">06</span><span class="p">,</span><span class="mi">917</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">207</span><span class="p">]</span> <span class="n">Use</span> <span class="n">HLG</span> <span class="n">decoding</span>
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">07</span><span class="p">,</span><span class="mi">129</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">220</span><span class="p">]</span>
|
||||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0121</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||||
<span class="n">甚至</span> <span class="n">出现</span> <span class="n">交易</span> <span class="n">几乎</span> <span class="n">停滞</span> <span class="n">的</span> <span class="n">情况</span>
|
||||
|
||||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0122</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||||
<span class="n">一二</span> <span class="n">线</span> <span class="n">城市</span> <span class="n">虽然</span> <span class="n">也</span> <span class="n">处于</span> <span class="n">调整</span> <span class="n">中</span>
|
||||
|
||||
<span class="o">./</span><span class="n">tmp</span><span class="o">/</span><span class="n">icefall_asr_aishell_tdnn_lstm_ctc</span><span class="o">/</span><span class="n">test_waves</span><span class="o">/</span><span class="n">BAC009S0764W0123</span><span class="o">.</span><span class="n">wav</span><span class="p">:</span>
|
||||
<span class="n">但</span> <span class="n">因为</span> <span class="n">聚集</span> <span class="n">了</span> <span class="n">过多</span> <span class="n">公共</span> <span class="n">资源</span>
|
||||
|
||||
|
||||
<span class="mi">2021</span><span class="o">-</span><span class="mi">09</span><span class="o">-</span><span class="mi">13</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">07</span><span class="p">,</span><span class="mi">129</span> <span class="n">INFO</span> <span class="p">[</span><span class="n">pretrained</span><span class="o">.</span><span class="n">py</span><span class="p">:</span><span class="mi">222</span><span class="p">]</span> <span class="n">Decoding</span> <span class="n">Done</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
</section>
|
||||
<section id="colab-notebook">
|
||||
<h2>Colab notebook<a class="headerlink" href="#colab-notebook" title="Permalink to this heading"></a></h2>
|
||||
<p>We do provide a colab notebook for this recipe showing how to use a pre-trained model.</p>
|
||||
<p><a class="reference external" href="https://colab.research.google.com/drive/1qULaGvXq7PCu_P61oubfz9b53JzY4H3z"><img alt="aishell asr conformer ctc colab notebook" src="https://colab.research.google.com/assets/colab-badge.svg" /></a></p>
|
||||
<p><strong>Congratulations!</strong> You have finished the aishell ASR recipe with
|
||||
TDNN-LSTM CTC models in <code class="docutils literal notranslate"><span class="pre">icefall</span></code>.</p>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
|
||||
<a href="index.html" class="btn btn-neutral float-left" title="aishell" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
|
||||
<a href="conformer_ctc.html" class="btn btn-neutral float-right" title="Conformer CTC" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
|
||||
</div>
|
||||
|
||||
<hr/>
|
||||
|
||||
<div role="contentinfo">
|
||||
<p>© Copyright 2021, icefall development team.</p>
|
||||
</div>
|
||||
|
||||
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
||||
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
||||
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
||||
|
||||
|
||||
</footer>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
<script>
|
||||
jQuery(function () {
|
||||
SphinxRtdTheme.Navigation.enable(true);
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|