From d1976a19d7750dd9b4ec324a8f8315fd169227e5 Mon Sep 17 00:00:00 2001
From: csukuangfj
Date: Tue, 10 Jan 2023 08:05:51 +0000
Subject: [PATCH] deploy: fcffa593f011bd3213af5af044eb3ce2ede666c1
---
_sources/faqs.rst.txt | 67 +++++++
_sources/index.rst.txt | 1 +
contributing/code-style.html | 1 +
contributing/doc.html | 1 +
contributing/how-to-create-a-recipe.html | 1 +
contributing/index.html | 1 +
faqs.html | 172 ++++++++++++++++++
genindex.html | 1 +
huggingface/index.html | 1 +
huggingface/pretrained-models.html | 1 +
huggingface/spaces.html | 1 +
index.html | 6 +
installation/index.html | 5 +-
model-export/export-model-state-dict.html | 1 +
model-export/export-ncnn.html | 1 +
model-export/export-onnx.html | 1 +
.../export-with-torch-jit-script.html | 1 +
model-export/export-with-torch-jit-trace.html | 1 +
model-export/index.html | 5 +-
objects.inv | Bin 1084 -> 1123 bytes
.../aishell/conformer_ctc.html | 1 +
recipes/Non-streaming-ASR/aishell/index.html | 1 +
.../aishell/stateless_transducer.html | 1 +
.../aishell/tdnn_lstm_ctc.html | 1 +
recipes/Non-streaming-ASR/index.html | 1 +
.../librispeech/conformer_ctc.html | 1 +
.../Non-streaming-ASR/librispeech/index.html | 1 +
.../pruned_transducer_stateless.html | 1 +
.../librispeech/tdnn_lstm_ctc.html | 1 +
.../librispeech/zipformer_ctc_blankskip.html | 1 +
.../librispeech/zipformer_mmi.html | 1 +
recipes/Non-streaming-ASR/timit/index.html | 1 +
.../timit/tdnn_ligru_ctc.html | 1 +
.../timit/tdnn_lstm_ctc.html | 1 +
recipes/Non-streaming-ASR/yesno/index.html | 1 +
recipes/Non-streaming-ASR/yesno/tdnn.html | 1 +
recipes/Streaming-ASR/index.html | 1 +
recipes/Streaming-ASR/introduction.html | 1 +
recipes/Streaming-ASR/librispeech/index.html | 1 +
.../lstm_pruned_stateless_transducer.html | 1 +
.../pruned_transducer_stateless.html | 1 +
.../librispeech/zipformer_transducer.html | 1 +
recipes/index.html | 1 +
search.html | 1 +
searchindex.js | 2 +-
45 files changed, 290 insertions(+), 5 deletions(-)
create mode 100644 _sources/faqs.rst.txt
create mode 100644 faqs.html
diff --git a/_sources/faqs.rst.txt b/_sources/faqs.rst.txt
new file mode 100644
index 000000000..c70ded431
--- /dev/null
+++ b/_sources/faqs.rst.txt
@@ -0,0 +1,67 @@
+Frequently Asked Questions (FAQs)
+=================================
+
+In this section, we collect issues reported by users and post the corresponding
+solutions.
+
+
+OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
+-----------------------------------------------------------------------------------
+
+One user is using the following code to install ``torch`` and ``torchaudio``:
+
+.. code-block:: bash
+
+ pip install \
+ torch==1.10.0+cu111 \
+ torchvision==0.11.0+cu111 \
+ torchaudio==0.10.0 \
+ -f https://download.pytorch.org/whl/torch_stable.html
+
+and it throws the following error when running ``tdnn/train.py``:
+
+.. code-block::
+
+ OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
+
+The fix is to specify the CUDA version while installing ``torchaudio``. That
+is, change ``torchaudio==0.10.0`` to ``torchaudio==0.10.0+cu11```. Therefore,
+the correct command is:
+
+.. code-block:: bash
+
+ pip install \
+ torch==1.10.0+cu111 \
+ torchvision==0.11.0+cu111 \
+ torchaudio==0.10.0+cu111 \
+ -f https://download.pytorch.org/whl/torch_stable.html
+
+AttributeError: module 'distutils' has no attribute 'version'
+-------------------------------------------------------------
+
+The error log is:
+
+.. code-block::
+
+ Traceback (most recent call last):
+ File "./tdnn/train.py", line 14, in
+ from asr_datamodule import YesNoAsrDataModule
+ File "/home/xxx/code/next-gen-kaldi/icefall/egs/yesno/ASR/tdnn/asr_datamodule.py", line 34, in
+ from icefall.dataset.datamodule import DataModule
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/__init__.py", line 3, in
+ from . import (
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/decode.py", line 23, in
+ from icefall.utils import add_eos, add_sos, get_texts
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/utils.py", line 39, in
+ from torch.utils.tensorboard import SummaryWriter
+ File "/home/xxx/tool/miniconda3/envs/yyy/lib/python3.8/site-packages/torch/utils/tensorboard/__init__.py", line 4, in
+ LooseVersion = distutils.version.LooseVersion
+ AttributeError: module 'distutils' has no attribute 'version'
+
+The fix is:
+
+.. code-block:: bash
+
+ pip uninstall setuptools
+
+ pip install setuptools==58.0.4
diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt
index 4ea446259..8d76eb68b 100644
--- a/_sources/index.rst.txt
+++ b/_sources/index.rst.txt
@@ -21,6 +21,7 @@ speech recognition recipes using `k2 `_.
:caption: Contents:
installation/index
+ faqs
model-export/index
.. toctree::
diff --git a/contributing/code-style.html b/contributing/code-style.html
index db1a46ca6..5daabfe67 100644
--- a/contributing/code-style.html
+++ b/contributing/code-style.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/contributing/doc.html b/contributing/doc.html
index dcba822e3..0adeec498 100644
--- a/contributing/doc.html
+++ b/contributing/doc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/contributing/how-to-create-a-recipe.html b/contributing/how-to-create-a-recipe.html
index 08dccf3d8..92d92d7ed 100644
--- a/contributing/how-to-create-a-recipe.html
+++ b/contributing/how-to-create-a-recipe.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/contributing/index.html b/contributing/index.html
index d46281c54..97cd4821d 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/faqs.html b/faqs.html
new file mode 100644
index 000000000..b59195ae3
--- /dev/null
+++ b/faqs.html
@@ -0,0 +1,172 @@
+
+
+
+
+
+
+ Frequently Asked Questions (FAQs) — icefall 0.1 documentation
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ icefall
+
+
+
+
+
+
+
+
+
+Frequently Asked Questions (FAQs)
+In this section, we collect issues reported by users and post the corresponding
+solutions.
+
+OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
+One user is using the following code to install torch
and torchaudio
:
+ pip install \
+ torch == 1 .10.0+cu111 \
+ torchvision == 0 .11.0+cu111 \
+ torchaudio == 0 .10.0 \
+ -f https://download.pytorch.org/whl/torch_stable.html
+
+
+and it throws the following error when running tdnn/train.py
:
+OSError : libtorch_hip . so : cannot open shared object file : no such file or directory
+
+
+The fix is to specify the CUDA version while installing torchaudio
. That
+is, change torchaudio==0.10.0
to torchaudio==0.10.0+cu11`
. Therefore,
+the correct command is:
+ pip install \
+ torch == 1 .10.0+cu111 \
+ torchvision == 0 .11.0+cu111 \
+ torchaudio == 0 .10.0+cu111 \
+ -f https://download.pytorch.org/whl/torch_stable.html
+
+
+
+
+AttributeError: module ‘distutils’ has no attribute ‘version’
+The error log is:
+Traceback ( most recent call last ):
+ File "./tdnn/train.py" , line 14 , in < module >
+ from asr_datamodule import YesNoAsrDataModule
+ File "/home/xxx/code/next-gen-kaldi/icefall/egs/yesno/ASR/tdnn/asr_datamodule.py" , line 34 , in < module >
+ from icefall.dataset.datamodule import DataModule
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/__init__.py" , line 3 , in < module >
+ from . import (
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/decode.py" , line 23 , in < module >
+ from icefall.utils import add_eos , add_sos , get_texts
+ File "/home/xxx/code/next-gen-kaldi/icefall/icefall/utils.py" , line 39 , in < module >
+ from torch.utils.tensorboard import SummaryWriter
+ File "/home/xxx/tool/miniconda3/envs/yyy/lib/python3.8/site-packages/torch/utils/tensorboard/__init__.py" , line 4 , in < module >
+ LooseVersion = distutils . version . LooseVersion
+AttributeError : module 'distutils' has no attribute 'version'
+
+
+The fix is:
+ pip uninstall setuptools
+
+pip install setuptools == 58 .0.4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/genindex.html b/genindex.html
index 9dfae8b5a..7f7a172e9 100644
--- a/genindex.html
+++ b/genindex.html
@@ -39,6 +39,7 @@
Contents:
diff --git a/huggingface/index.html b/huggingface/index.html
index db2b38ba8..40ac4ed56 100644
--- a/huggingface/index.html
+++ b/huggingface/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/huggingface/pretrained-models.html b/huggingface/pretrained-models.html
index 55d1a8b55..3d53347ca 100644
--- a/huggingface/pretrained-models.html
+++ b/huggingface/pretrained-models.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/huggingface/spaces.html b/huggingface/spaces.html
index eadb9f604..1d4bf7f24 100644
--- a/huggingface/spaces.html
+++ b/huggingface/spaces.html
@@ -41,6 +41,7 @@
Contents:
@@ -563,7 +564,7 @@ the following YouTube channel by
Previous
- Next
+ Next
diff --git a/model-export/export-model-state-dict.html b/model-export/export-model-state-dict.html
index 84dafe496..cbdce5a06 100644
--- a/model-export/export-model-state-dict.html
+++ b/model-export/export-model-state-dict.html
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
When to use it
diff --git a/model-export/export-ncnn.html b/model-export/export-ncnn.html
index 0da06c897..700fa9bb4 100644
--- a/model-export/export-ncnn.html
+++ b/model-export/export-ncnn.html
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
Export model with torch.jit.trace()
diff --git a/model-export/export-onnx.html b/model-export/export-onnx.html
index 9d3a8ef42..d6f444670 100644
--- a/model-export/export-onnx.html
+++ b/model-export/export-onnx.html
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
Export model with torch.jit.trace()
diff --git a/model-export/export-with-torch-jit-script.html b/model-export/export-with-torch-jit-script.html
index ae0ee3fec..9610dc964 100644
--- a/model-export/export-with-torch-jit-script.html
+++ b/model-export/export-with-torch-jit-script.html
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
Export model with torch.jit.trace()
diff --git a/model-export/export-with-torch-jit-trace.html b/model-export/export-with-torch-jit-trace.html
index 9215ca393..2be6b1350 100644
--- a/model-export/export-with-torch-jit-trace.html
+++ b/model-export/export-with-torch-jit-trace.html
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
Export model with torch.jit.trace()
diff --git a/model-export/index.html b/model-export/index.html
index b8205f632..fd8e45fdd 100644
--- a/model-export/index.html
+++ b/model-export/index.html
@@ -21,7 +21,7 @@
-
+
@@ -42,6 +42,7 @@
Contents:
Installation
+Frequently Asked Questions (FAQs)
Model export
Export model.state_dict()
Export model with torch.jit.trace()
@@ -122,7 +123,7 @@
diff --git a/objects.inv b/objects.inv
index 151d942ddfb0bd434f3785348043ceae78d8fe7a..5e51b58fcdb0c57e9d5ccc1f6573a9092a3af288 100644
GIT binary patch
delta 1016
zcmV)94r~_Z#<3zL(0ft#4dJR#RB#`!7JtuJ30!uhPy`paBPDI8
z8HdAd)lZtbK-Px}WPN@)$}93RiEg~(C1m8K6`jHj^n~F@(P3V4Zcte9K$ym{#pFC!
zP`-gAe4XFV8GY!eYzZmrIg_f{v|Z0hwUN*Y!iba`)fHS;eMve~clHK-FvXB5hM*YI
z#e53SRMpUiu79lYzf@sacC=MK9~)@1=0kEYEm&4ZA`^K9kykeI?H^}Zz)DJzOu}Tv
za4>;D;j#E3-502z2laO3M-M7vJ9I6dNQJ)iL~6xJ^!E1pi-g>ezJ1E)zK@yB$B>Vm
zuPgl3YzaS#?Et^GTE+|gZ&vr)EmV7i^^Ns8at>NQnt#3gwj1R;Hhkc_R(e8;$=rE)=I;%ymM@K?sU(N2(g2;(qy&eYK{9yUxwGeAPYg7cS4
zfI8+B@PF^s>T4D~VJ=me9!qgUDR#;v@;FYWsJY}Jj&wHmTI8jHv~~><7#Zyj?;VGn
z9>$Ix9**}y+TBjog8SL~U27#cXw%)f3My;D7>qy;dr(8VO{u({KQZ;Nt6CGfS>Hrd
zAN4UZ#a0_k5p_cQaa!5Q9UbL+4_Y5Q5tLNL`G3Zo8Il|CrCJi}_p25DoUfM`8lTW`
zr_ta8FI2%f%TMiZ)cG=-EjaUdm#*SYNDQ-zMF5eY_e%nXVA%$NPUSD8YqDQeq%6
zO@likxkzcOlVB@-BXT7!R?rkv3g~!eCGH~fxVfHB&Ghg*KMhwM94AVBN{#_PM(o2<2FZCJH|Yrg
delta 977
zcmV;?11|jI2)qc8d4C%r5WVlOKynZ5fmprv)}~*UYB$QdQre5P!Ng0$!fL@v(qG>J
z);4%q8XveY~4SW2&W;)Kji>vueAopzD=_R!~lrTIsG5YP}`2vvnu0&?j3A*9QaIQ^
zpl}z{Xjt}?#wRWxLq7K2khtM9P-f<9QExXJsCEY&hZ-9=d%YgaL4Mm&
zSS$mbxJIintU;i|z3M3L?XHz7fLASij$%6RfFN@AVt=qUzY~PsRh&Z%$eVM--0!e?
zz0*ztuNjlxj(15jFRmCSg%qNYAw*1y&Su|5yk?>s%QpNHvDBpByLZ
zn3wvA8Lcr^LAmsGmXz^xYuZKqW$3of%8NK|5)=3aes}b=LyV*$yoCs%~kzo%%wFa*6h!6
z2dx~Ab@@zAfbDj56MzD3_W?JFgr6bD(exup;`B%LhqZ?%zPja=xL5bA&F3=$sohlKK9Z6oD>-a^WRq4wMs
z#t@FDW2ASZn(>rz?M#KD6`bnBW3;pNuS$gTUsW4g9F4%OGXEO2$G7MB|2!GPabj%P
z+A!EK)WH?@>bQ$#aBsJFiXt47|B#Contents:
diff --git a/recipes/Non-streaming-ASR/aishell/index.html b/recipes/Non-streaming-ASR/aishell/index.html
index 4101b9369..9660041ce 100644
--- a/recipes/Non-streaming-ASR/aishell/index.html
+++ b/recipes/Non-streaming-ASR/aishell/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
index 8f6aaa22a..5428bdd0a 100644
--- a/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
+++ b/recipes/Non-streaming-ASR/aishell/stateless_transducer.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
index d3e88a5d0..3aaa61ac0 100644
--- a/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/index.html b/recipes/Non-streaming-ASR/index.html
index 06960dab8..3013996d8 100644
--- a/recipes/Non-streaming-ASR/index.html
+++ b/recipes/Non-streaming-ASR/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html b/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
index 6eb1974b2..b445b72a8 100644
--- a/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
+++ b/recipes/Non-streaming-ASR/librispeech/conformer_ctc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/index.html b/recipes/Non-streaming-ASR/librispeech/index.html
index c615f1629..54992b6ac 100644
--- a/recipes/Non-streaming-ASR/librispeech/index.html
+++ b/recipes/Non-streaming-ASR/librispeech/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html b/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
index 8a6f56c24..ed98e07bd 100644
--- a/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
+++ b/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
index 8c0ba4f72..ebe4fd7c4 100644
--- a/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html b/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
index f7de0c5de..2b1e4b0db 100644
--- a/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
+++ b/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html b/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
index 737ae09a0..e1e4bc490 100644
--- a/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
+++ b/recipes/Non-streaming-ASR/librispeech/zipformer_mmi.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/timit/index.html b/recipes/Non-streaming-ASR/timit/index.html
index 470e849d1..a266c8585 100644
--- a/recipes/Non-streaming-ASR/timit/index.html
+++ b/recipes/Non-streaming-ASR/timit/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html b/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
index 75fe5b92a..86b46c235 100644
--- a/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
+++ b/recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html b/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
index 9a6d74c7e..1c24e6c57 100644
--- a/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
+++ b/recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/yesno/index.html b/recipes/Non-streaming-ASR/yesno/index.html
index 4adb2d7fa..6e017f36a 100644
--- a/recipes/Non-streaming-ASR/yesno/index.html
+++ b/recipes/Non-streaming-ASR/yesno/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Non-streaming-ASR/yesno/tdnn.html b/recipes/Non-streaming-ASR/yesno/tdnn.html
index 22c4fbb45..75a2fc013 100644
--- a/recipes/Non-streaming-ASR/yesno/tdnn.html
+++ b/recipes/Non-streaming-ASR/yesno/tdnn.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/index.html b/recipes/Streaming-ASR/index.html
index b6152166e..61c5d9ba5 100644
--- a/recipes/Streaming-ASR/index.html
+++ b/recipes/Streaming-ASR/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/introduction.html b/recipes/Streaming-ASR/introduction.html
index 3eff11485..7d7817b28 100644
--- a/recipes/Streaming-ASR/introduction.html
+++ b/recipes/Streaming-ASR/introduction.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/librispeech/index.html b/recipes/Streaming-ASR/librispeech/index.html
index 5fd2cc844..474130241 100644
--- a/recipes/Streaming-ASR/librispeech/index.html
+++ b/recipes/Streaming-ASR/librispeech/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html b/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
index 491d0aedf..1ef4f377d 100644
--- a/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
+++ b/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html b/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
index 59d36c03b..f763d5041 100644
--- a/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
+++ b/recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/Streaming-ASR/librispeech/zipformer_transducer.html b/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
index 044cb4821..e2b1c6ca8 100644
--- a/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
+++ b/recipes/Streaming-ASR/librispeech/zipformer_transducer.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/recipes/index.html b/recipes/index.html
index cdd125972..4e84bdb2b 100644
--- a/recipes/index.html
+++ b/recipes/index.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/search.html b/search.html
index fa996ebaf..3bfecf583 100644
--- a/search.html
+++ b/search.html
@@ -42,6 +42,7 @@
Contents:
diff --git a/searchindex.js b/searchindex.js
index 619465abd..9cbc52f3a 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "us": [0, 1, 2, 4, 6, 7, 8, 10, 14, 15, 16, 17, 18, 20, 23, 27, 28, 30, 32], "tool": [0, 34], "make": [0, 1, 3, 15, 17, 20, 32, 34], "consist": [0, 17, 22, 34, 35, 36], "possibl": [0, 2, 3, 8, 15, 20], "black": 0, "format": [0, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "flake8": 0, "check": [0, 20], "qualiti": [0, 16], "isort": 0, "sort": [0, 8], "import": [0, 35, 36], "The": [0, 1, 2, 6, 8, 9, 10, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "version": [0, 8, 9, 15, 17, 18, 20, 22, 23, 27, 28, 34, 35], "abov": [0, 8, 9, 15, 16, 17, 18, 20, 22, 24, 25, 30, 32, 34, 35, 36], "ar": [0, 1, 3, 8, 9, 11, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36, 37], "22": [0, 20, 27, 28, 30], "3": [0, 7, 9, 18, 22, 23, 24, 25, 30, 34, 35, 36], "0": [0, 1, 7, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "5": [0, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "4": [0, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "10": [0, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "1": [0, 7, 9, 11, 12, 13, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "after": [0, 1, 6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "run": [0, 2, 6, 8, 11, 14, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "command": [0, 1, 8, 9, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "git": [0, 8, 9, 15, 17, 18, 20, 23, 27, 28, 30, 34], "clone": [0, 8, 9, 15, 17, 18, 20, 23, 27, 28, 30, 34], "http": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "github": [0, 2, 5, 8, 9, 10, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "com": [0, 2, 5, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "k2": [0, 2, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "fsa": [0, 2, 5, 6, 8, 9, 10, 11, 12, 13, 15, 17, 20, 22, 24, 25, 34, 35, 36], "icefal": [0, 2, 3, 5, 6, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "cd": [0, 1, 2, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "pip": [0, 1, 8, 17, 34], "instal": [0, 1, 4, 6, 7, 9, 22, 24, 25, 30, 34, 35, 36], "pre": [0, 3, 4, 6, 7, 8], "commit": 0, "whenev": 0, "you": [0, 1, 2, 5, 6, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "automat": [0, 6], "hook": 0, "invok": 0, "fail": [0, 8], "If": [0, 2, 6, 8, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "ani": [0, 8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "your": [0, 1, 2, 4, 6, 7, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "wa": [0, 8, 9, 20, 23], "success": [0, 8], "pleas": [0, 1, 2, 6, 8, 10, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "fix": [0, 8, 20], "issu": [0, 8, 20, 35, 36], "report": [0, 8], "some": [0, 1, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "i": [0, 1, 2, 6, 8, 9, 10, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "e": [0, 2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "modifi": [0, 15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "file": [0, 2, 6, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "place": [0, 8, 9, 17, 20, 23], "so": [0, 6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "statu": 0, "failur": 0, "see": [0, 1, 6, 8, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "which": [0, 2, 6, 9, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 35, 36], "ha": [0, 2, 10, 11, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 32, 34, 35, 36], "been": [0, 10, 11, 17], "befor": [0, 1, 9, 12, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "further": 0, "chang": [0, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "all": [0, 5, 6, 9, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "again": [0, 30], "should": [0, 2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "succe": 0, "thi": [0, 2, 3, 4, 8, 9, 11, 12, 13, 14, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "time": [0, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "succeed": 0, "want": [0, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "can": [0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "do": [0, 2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "Or": 0, "without": [0, 4, 6, 15, 20, 34], "your_changed_fil": 0, "py": [0, 2, 8, 11, 12, 13, 14, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 5, 6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "prepar": [1, 3, 9], "environ": [1, 15, 16, 17, 18, 20, 22, 23, 27, 28, 30, 34, 35, 36], "doc": [1, 9], "r": [1, 8, 27, 28], "requir": [1, 8, 35, 36], "txt": [1, 8, 15, 17, 18, 20, 23, 27, 28, 30], "set": [1, 8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "up": [1, 8, 9, 15, 18, 20, 22, 23, 24, 25, 35, 36], "readi": [1, 15, 20], "refer": [1, 2, 8, 9, 10, 12, 13, 15, 17, 18, 20, 22, 23, 24, 27, 28, 30, 32, 35, 36], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 8, 9, 15, 17, 20, 34], "local": [1, 8, 22, 24, 25, 34, 35, 36], "preview": 1, "what": [1, 2, 8, 17, 32], "look": [1, 2, 5, 8, 15, 17, 18, 20], "like": [1, 2, 6, 8, 15, 17, 18, 20, 22, 24, 25, 30, 32, 34, 35], "publish": [1, 9, 16], "html": [1, 2, 8, 12, 13, 22, 34, 35, 36], "gener": [1, 9, 11, 12, 13, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "view": [1, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "follow": [1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "python3": [1, 8], "m": [1, 17, 22, 24, 25, 27, 28, 34, 35, 36], "server": [1, 6, 8, 34], "It": [1, 2, 4, 8, 10, 11, 12, 13, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "print": [1, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "serv": [1, 22, 24, 25, 34, 35, 36], "port": [1, 22, 24, 25, 34, 35, 36], "8000": [1, 30], "open": [1, 9, 16, 17, 20], "browser": [1, 4, 6, 22, 24, 25, 34, 35, 36], "go": [1, 15, 17, 20, 22, 24, 25, 34, 35, 36], "read": [2, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "code": [2, 3, 7, 15, 20, 22, 23, 27, 28, 30, 35, 36], "style": [2, 3, 7], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 8, 9, 12, 13, 15, 17, 20, 22, 24, 25, 34, 35, 36], "recommend": [2, 8, 15, 17, 18, 20, 22, 35, 36], "test": [2, 7, 9, 10, 11, 15, 17, 18, 20, 23, 24, 27, 28], "valid": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "dataset": [2, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "lhots": [2, 7, 9, 15, 17, 20], "readthedoc": [2, 8], "io": [2, 8, 12, 13, 22, 34, 35, 36], "en": [2, 8], "latest": [2, 6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "index": [2, 8, 12, 13, 34, 35, 36], "yesno": [2, 7, 8, 19, 30, 37], "veri": [2, 3, 17, 27, 28, 30, 35, 36], "good": 2, "exampl": [2, 6, 7, 9, 11, 12, 13, 23, 27, 28, 30], "speech": [2, 6, 7, 8, 10, 11, 16, 17, 30, 37], "pull": [2, 15, 17, 20, 32], "380": [2, 28], "show": [2, 6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "add": [2, 15, 17, 18, 35, 37], "new": [2, 3, 6, 8, 15, 16, 17, 18, 20, 22, 23, 24, 25, 30, 34, 35, 36], "suppos": [2, 35, 36], "would": [2, 8, 9, 20, 23, 35, 36], "name": [2, 9, 15, 17, 22, 24, 25, 35, 36], "foo": [2, 11, 13, 15, 20, 22, 24, 25, 34, 35, 36], "eg": [2, 5, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "mkdir": [2, 15, 17, 18, 20, 23, 27, 28, 30, 34], "p": [2, 8, 17, 27, 28, 34], "asr": [2, 5, 7, 8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "touch": 2, "sh": [2, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "chmod": 2, "x": [2, 32], "simpl": [2, 17], "own": [2, 22, 35, 36], "otherwis": [2, 15, 17, 20, 22, 24, 25, 34, 35, 36], "librispeech": [2, 5, 7, 9, 11, 12, 13, 19, 20, 22, 23, 24, 25, 31, 32, 34, 35, 36, 37], "assum": [2, 8, 9, 15, 17, 18, 20, 22, 23, 27, 28, 30, 34, 35, 36], "fanci": 2, "call": 2, "bar": [2, 11, 13, 15, 20, 22, 24, 25, 34, 35, 36], "organ": 2, "wai": [2, 3, 14, 22, 24, 25, 32, 34, 35, 36], "readm": [2, 15, 17, 18, 20, 23, 27, 28, 30], "md": [2, 5, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "asr_datamodul": [2, 8], "pretrain": [2, 9, 11, 13, 15, 17, 18, 20, 23, 27, 28, 30], "For": [2, 5, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "instanc": [2, 5, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "tdnn": [2, 8, 16, 19, 21, 26, 29], "its": [2, 9, 13, 17, 24], "directori": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "structur": 2, "descript": [2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "contain": [2, 7, 9, 10, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36, 37], "inform": [2, 9, 15, 17, 18, 20, 22, 23, 24, 27, 28, 30, 32, 34, 35, 36], "g": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "wer": [2, 8, 9, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "etc": [2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "provid": [2, 6, 8, 9, 10, 11, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36, 37], "pytorch": [2, 7, 17], "dataload": [2, 8], "take": [2, 9, 22, 30, 35, 36], "input": [2, 9, 15, 17, 18, 20, 23, 27, 28, 30, 32], "checkpoint": [2, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "save": [2, 8, 9, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "dure": [2, 6, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "stage": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "": [2, 8, 9, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "definit": 2, "neural": [2, 15, 20], "network": [2, 15, 17, 20, 22, 24, 25, 34, 35, 36], "script": [2, 7, 8, 13, 14, 15, 17, 18, 20, 23, 27, 28, 30, 34], "infer": [2, 9, 11], "tdnn_lstm_ctc": [2, 18, 23, 28], "conformer_ctc": [2, 15, 20], "get": [2, 6, 8, 15, 17, 18, 20, 22, 23, 24, 25, 30, 34, 35, 36], "feel": [2, 34], "result": [2, 5, 6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "everi": [2, 9, 22, 24, 25, 34, 35, 36], "kept": [2, 22, 35, 36], "self": [2, 10, 32], "toler": 2, "duplic": 2, "among": [2, 8], "differ": [2, 8, 15, 16, 20, 22, 32, 34, 35, 36], "invoc": 2, "help": [2, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "blob": [2, 5, 9, 13, 22, 24, 25, 34, 35, 36], "master": [2, 5, 9, 11, 12, 13, 17, 22, 24, 25, 34, 35, 36], "transform": [2, 15, 20, 34], "conform": [2, 11, 12, 16, 17, 19, 21, 22, 24, 34, 35, 36], "base": [2, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "lstm": [2, 10, 13, 16, 19, 21, 26, 31, 33], "attent": [2, 17, 18, 32, 35, 36], "lm": [2, 8, 17, 22, 23, 27, 28, 30, 35, 36], "rescor": [2, 18, 23, 25, 27, 28, 30], "demonstr": [2, 4, 6, 9], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "mani": [3, 35, 36], "two": [3, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "them": [3, 4, 5, 6, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "To": [3, 6, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "document": [3, 7, 9, 25], "repositori": 3, "recip": [3, 5, 7, 8, 9, 15, 17, 18, 20, 22, 23, 27, 28, 30, 32, 34, 35, 36], "In": [3, 6, 8, 9, 11, 12, 13, 14, 15, 17, 18, 20, 23, 27, 28, 30, 32], "page": [3, 6, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "describ": [3, 4, 9, 11, 12, 13, 14, 15, 17, 18, 20, 22, 23, 27, 28, 35, 36], "how": [3, 4, 6, 7, 8, 14, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "creat": [3, 7, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35], "data": [3, 9, 11, 12, 13, 16], "train": [3, 4, 6, 7, 9, 12, 13, 32], "decod": [3, 6, 11, 13, 14], "model": [3, 4, 6, 7, 8, 10, 32], "section": [4, 8, 9, 11, 12, 13, 14, 15, 20], "find": [4, 5, 6, 8, 9, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "also": [4, 5, 8, 9, 10, 11, 13, 15, 17, 18, 20, 22, 24, 25, 30, 32, 34, 35, 36], "try": [4, 6, 22, 24, 25, 34, 35, 36], "from": [4, 6, 8, 9, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "within": [4, 6], "anyth": [4, 6], "space": [4, 7], "youtub": [4, 7, 20, 22, 23, 24, 25, 34, 35, 36], "video": [4, 7, 20, 22, 23, 24, 25, 34, 35, 36], "upload": [5, 6, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "huggingfac": [5, 7, 9, 15, 17, 18, 20, 23, 24, 25, 27, 28, 30, 34], "co": [5, 6, 9, 15, 16, 17, 18, 20, 23, 24, 25, 27, 28, 30, 34], "visit": [5, 6, 22, 24, 25, 34, 35, 36], "link": [5, 8, 9, 10, 22, 24, 25, 34, 35, 36], "search": [5, 6], "specif": [5, 17], "correspond": [5, 6], "aishel": [5, 7, 15, 17, 18, 19, 37], "gigaspeech": [5, 12, 34], "wenetspeech": [5, 12], "integr": 6, "framework": [6, 11, 22, 35], "sherpa": [6, 10, 11, 12, 13, 34], "need": [6, 8, 9, 10, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "download": [6, 7, 16], "window": [6, 10, 11, 34], "maco": [6, 10, 11, 34], "linux": [6, 10, 11, 34], "even": [6, 8], "ipad": 6, "phone": 6, "start": [6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "address": [6, 9, 17, 22, 25, 34, 35, 36], "recognit": [6, 7, 10, 11, 16, 17, 30, 37], "screenshot": [6, 15, 17, 18, 20, 22, 30, 34, 35], "select": [6, 22, 23, 27, 28, 30, 34, 35, 36], "languag": [6, 15, 17, 18], "current": [6, 8, 17, 32, 34, 35, 36, 37], "chines": [6, 16, 17], "english": [6, 30, 34], "target": 6, "method": [6, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "greedi": 6, "modified_beam_search": [6, 17, 22, 24, 34, 35, 36], "choos": [6, 8, 22, 24, 25, 34, 35, 36], "number": [6, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "activ": 6, "path": [6, 9, 11, 13, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "either": [6, 15, 17, 18, 20, 35, 36], "record": [6, 15, 16, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "click": [6, 8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "button": 6, "submit": 6, "wait": 6, "moment": 6, "an": [6, 8, 9, 11, 12, 13, 15, 16, 17, 20, 22, 25, 30, 34, 35, 36], "when": [6, 14, 17, 20, 22, 24, 25, 35, 36], "bottom": [6, 22, 24, 25, 34, 35, 36], "part": [6, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "tabl": 6, "one": [6, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "next": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "gen": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "kaldi": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "subscrib": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "channel": [6, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "nadira": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "povei": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "www": [6, 8, 16, 20, 22, 23, 24, 25, 34, 35, 36], "uc_vaumpkminz1pnkfxan9mw": [6, 8, 20, 22, 23, 24, 25, 34, 35, 36], "torchaudio": [7, 32], "2": [7, 9, 11, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "export": [7, 8, 15, 17, 18, 20, 23, 27, 28, 30], "state_dict": [7, 14, 15, 17, 18, 20, 23, 27, 28, 30], "torch": [7, 8, 9, 14, 15, 17, 20], "jit": [7, 14, 20], "trace": [7, 12, 14], "onnx": [7, 9, 14], "ncnn": [7, 14], "non": [7, 11, 20, 32, 35, 37], "stream": [7, 11, 15, 20, 27, 28, 34, 37], "timit": [7, 19, 27, 28, 37], "introduct": [7, 31, 37], "contribut": 7, "depend": [8, 15, 20, 34], "step": [8, 9, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "order": [8, 15, 18, 20, 23, 27, 28], "matter": 8, "org": [8, 16, 17, 22, 34, 35, 36], "least": 8, "v1": [8, 15, 18, 20, 23, 27, 28], "9": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 30, 34, 35, 36], "alreadi": [8, 9], "don": [8, 12, 15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "t": [8, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "replac": 8, "compil": [8, 15, 17, 20], "against": 8, "strongli": 8, "collect": 8, "variabl": [8, 15, 18, 20, 22, 24, 25, 34, 35, 36], "pythonpath": [8, 34], "point": [8, 9, 15, 18, 20, 22, 24, 25, 34, 35, 36], "folder": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "tmp": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "setup": [8, 15, 17, 18, 20, 22, 23, 27, 28, 30, 34, 35, 36], "put": [8, 24, 35], "sever": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "same": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "switch": [8, 15, 20, 25], "just": [8, 32], "about": [8, 17, 22, 25, 34, 35, 36], "virtualenv": 8, "8": [8, 9, 15, 17, 20, 22, 23, 24, 25, 30, 34, 35, 36], "cpython3": 8, "6": [8, 15, 17, 20, 22, 23, 27, 28, 34], "final": [8, 9, 20, 23], "64": [8, 9, 17, 35], "1540m": 8, "creator": 8, "cpython3posix": 8, "dest": 8, "ceph": [8, 9, 15, 17, 20], "fj": [8, 9, 17, 20], "fangjun": [8, 9, 17, 20], "clear": 8, "fals": [8, 9, 15, 17, 20], "no_vcs_ignor": 8, "global": 8, "seeder": 8, "fromappdata": 8, "bundl": 8, "setuptool": 8, "wheel": [8, 34], "via": [8, 12, 13], "copi": [8, 32], "app_data_dir": 8, "root": 8, "share": 8, "v": [8, 20, 27, 28], "irtualenv": 8, "ad": [8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "seed": 8, "packag": 8, "21": [8, 9, 15, 17, 20, 27, 28], "57": [8, 20, 23], "36": [8, 17, 20], "bashactiv": 8, "cshellactiv": 8, "fishactiv": 8, "powershellactiv": 8, "pythonactiv": 8, "xonshactiv": 8, "sourc": [8, 9, 15, 16, 17, 20], "bin": [8, 15, 20, 34], "dev20210822": 8, "cpu": [8, 9, 12, 15, 22, 24, 25, 30, 35, 36], "torch1": 8, "f": [8, 27, 28], "nightli": 8, "whl": 8, "2bcpu": 8, "cp38": 8, "linux_x86_64": 8, "mb": 8, "________________________________": 8, "185": [8, 15, 20, 30], "kb": [8, 27, 28], "graphviz": 8, "17": [8, 9, 15, 20, 27, 28, 34], "py3": 8, "none": [8, 15, 20], "18": [8, 15, 17, 18, 20, 22, 23, 27, 28, 34, 35, 36], "cach": 8, "manylinux1_x86_64": 8, "831": [8, 17, 28], "type": [8, 9, 15, 17, 20, 22, 24, 25, 30, 32, 34, 35, 36], "extens": 8, "typing_extens": 8, "26": [8, 17, 20, 28], "successfulli": 8, "probabl": [8, 17, 22, 24, 34, 35, 36], "cuda": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "req": 8, "7b1b76ge": 8, "q": 8, "audioread": 8, "soundfil": 8, "post1": 8, "py2": 8, "7": [8, 9, 15, 18, 20, 22, 23, 27, 28, 34, 35], "97": [8, 15], "cytoolz": 8, "11": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "manylinux_2_17_x86_64": 8, "manylinux2014_x86_64": 8, "dataclass": 8, "14": [8, 9, 11, 12, 15, 20, 22, 23, 24, 27, 34, 35, 36], "h5py": 8, "manylinux_2_12_x86_64": 8, "manylinux2010_x86_64": 8, "684": [8, 15, 30], "intervaltre": 8, "lilcom": 8, "numpi": 8, "15": [8, 9, 17, 18, 20, 27, 30], "40": [8, 18, 20, 23, 27, 28], "pyyaml": 8, "662": 8, "tqdm": 8, "62": [8, 20, 23], "76": [8, 30], "73": 8, "satisfi": 8, "lib": 8, "site": 8, "dev": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "2a1410b": 8, "clean": [8, 15, 17, 20, 22, 23, 24, 25, 34, 35, 36], "toolz": 8, "55": [8, 18, 20, 27], "sortedcontain": 8, "29": [8, 15, 17, 18, 20, 23, 27, 28], "cffi": 8, "411": [8, 20], "pycpars": 8, "20": [8, 9, 15, 17, 18, 20, 22, 23, 27, 28, 30, 35], "112": 8, "pypars": 8, "67": 8, "done": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "filenam": [8, 11, 12, 13, 24, 25, 34, 36], "dev_2a1410b_clean": 8, "size": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "342242": 8, "sha256": 8, "f683444afa4dc0881133206b4646a": 8, "9d0f774224cc84000f55d0a67f6e4a37997": 8, "store": [8, 20], "ephem": 8, "ftu0qysz": 8, "7f": 8, "7a": 8, "8e": 8, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 8, "warn": 8, "built": 8, "invalid": [8, 20], "metadata": [8, 27, 28], "mandat": 8, "pep": 8, "440": 8, "packa": 8, "ging": 8, "deprec": [8, 17], "legaci": 8, "becaus": 8, "could": [8, 15, 18], "A": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 34, 35, 36], "discuss": 8, "regard": 8, "pypa": 8, "sue": 8, "8368": 8, "inter": 8, "valtre": 8, "sor": 8, "tedcontain": 8, "remot": 8, "enumer": 8, "object": [8, 15, 17, 18, 22, 30, 34, 35], "500": [8, 9, 17, 20, 25, 34], "count": 8, "100": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "compress": 8, "308": [8, 15, 17, 18], "total": [8, 15, 17, 18, 20, 22, 23, 30, 34, 35], "delta": 8, "263": 8, "reus": 8, "307": 8, "102": [8, 15], "pack": [8, 35, 36], "receiv": 8, "172": 8, "49": [8, 20, 28, 30], "kib": 8, "385": 8, "00": [8, 15, 17, 18, 20, 23, 27, 28, 30], "resolv": 8, "kaldilm": 8, "tar": 8, "gz": 8, "48": [8, 15, 17], "574": 8, "kaldialign": 8, "sentencepiec": [8, 20], "96": 8, "tensorboard": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "41": [8, 15, 17, 27, 30], "line": [8, 22, 35, 36], "absl": 8, "absl_pi": 8, "13": [8, 9, 17, 18, 20, 23, 24, 27], "132": 8, "googl": [8, 22, 24, 25, 34, 35, 36], "auth": 8, "oauthlib": 8, "google_auth_oauthlib": 8, "grpcio": 8, "24": [8, 18, 23, 27, 28, 30], "39": [8, 17, 20, 23, 27], "ment": 8, "12": [8, 9, 15, 17, 18, 20, 22, 24, 25, 27, 30, 34, 35, 36], "requi": 8, "rement": 8, "protobuf": 8, "manylinux_2_5_x86_64": 8, "werkzeug": 8, "288": 8, "tensorboard_data_serv": 8, "google_auth": 8, "35": [8, 9, 17, 20, 34], "152": 8, "request": [8, 32], "plugin": 8, "wit": 8, "tensorboard_plugin_wit": 8, "781": 8, "markdown": 8, "six": 8, "16": [8, 9, 13, 15, 17, 18, 20, 22, 23, 27, 28, 30, 34, 35, 36], "cachetool": 8, "rsa": 8, "34": 8, "pyasn1": 8, "modul": [8, 24, 35], "pyasn1_modul": 8, "155": 8, "requests_oauthlib": 8, "23": [8, 15, 17, 18, 20, 27, 28, 30], "77": [8, 20], "urllib3": 8, "27": [8, 15, 17, 23, 28], "138": [8, 15, 17], "certifi": 8, "2017": 8, "2021": [8, 15, 18, 20, 23, 27, 28, 30], "30": [8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "145": 8, "charset": 8, "normal": [8, 23, 27, 28, 30, 35], "charset_norm": 8, "idna": 8, "59": [8, 18, 20], "146": 8, "897233": 8, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 8, "f4c0d0cbc66eee6c88d68a63862": 8, "85": 8, "7d": 8, "63": [8, 17], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 8, "etool": 8, "oaut": 8, "hlib": 8, "let": [8, 15, 20, 34], "u": [8, 15, 17, 18, 20, 30, 34], "log": [8, 23, 27, 28, 30], "08": [8, 20, 23, 25, 27, 28, 30, 34], "19": [8, 9, 15, 20, 23, 27, 28], "main": [8, 15, 20, 32], "dl_dir": [8, 15, 18, 20, 22, 24, 25, 34, 35, 36], "waves_yesno": 8, "49mb": 8, "03": [8, 9, 17, 20, 27, 28, 34], "39mb": 8, "manifest": 8, "31": [8, 20], "42": [8, 15, 20, 30], "comput": [8, 9, 15, 17, 18, 22, 23, 25, 27, 28, 30, 34, 35, 36], "fbank": [8, 9, 15, 17, 18, 20, 23, 27, 28, 30], "32": [8, 15, 17, 18, 36], "803": 8, "info": [8, 9, 15, 17, 18, 20, 23, 27, 28, 30], "compute_fbank_yesno": 8, "52": [8, 15, 20], "process": [8, 9, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "extract": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "featur": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "_______________________________________________________________": 8, "90": 8, "01": [8, 17, 18, 20], "80": [8, 9, 15, 17, 20], "57it": 8, "085": 8, "______________________________________________________________": 8, "248": [8, 17], "21it": 8, "lang": [8, 9, 17, 20, 25], "fcordre9": 8, "kaldilm_6899d26f2d684ad48f21025950cd2866": 8, "csrc": [8, 20], "arpa_file_pars": 8, "cc": 8, "void": 8, "arpafilepars": 8, "rea": 8, "d": [8, 27, 28], "std": 8, "istream": 8, "79": 8, "140": [8, 18], "gram": [8, 15, 17, 18, 22, 23, 25, 27, 28, 35, 36], "89": [8, 15], "hlg": [8, 23, 27, 28, 30], "928": 8, "compile_hlg": 8, "120": 8, "lang_phon": [8, 18, 23, 27, 28, 30], "929": [8, 17], "lexicon": [8, 15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "116": 8, "convert": [8, 20, 34], "l": [8, 17, 27, 28, 30], "pt": [8, 9, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "linv": [8, 17, 20, 30], "931": 8, "ctc_topo": 8, "max_token_id": 8, "932": 8, "load": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "fst": [8, 17, 30], "intersect": [8, 22, 35, 36], "933": 8, "lg": [8, 22, 25, 35, 36], "shape": 8, "66": 8, "connect": [8, 9, 20, 22, 23, 34, 35, 36], "68": [8, 20], "70": 8, "class": [8, 20], "tensor": [8, 15, 17, 18, 20, 22, 30, 34, 35], "71": [8, 20, 23], "determin": 8, "934": 8, "74": [8, 9], "_k2": 8, "raggedint": 8, "remov": [8, 15, 17, 18, 20, 23, 27, 28], "disambigu": 8, "symbol": [8, 17, 22, 35, 36], "87": 8, "remove_epsilon": 8, "935": 8, "92": [8, 20], "arc": 8, "95": [8, 16], "compos": 8, "h": 8, "105": [8, 20], "936": 8, "107": [8, 23], "123": 8, "now": [8, 15, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "cuda_visible_devic": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "gpu": [8, 15, 17, 18, 20, 22, 24, 25, 27, 28, 30, 34, 35, 36], "avail": [8, 9, 15, 17, 20, 23, 27, 28, 30, 34], "case": [8, 9, 22, 24, 25, 34, 35, 36], "segment": 8, "fault": 8, "core": 8, "dump": 8, "error": [8, 20], "protocol_buffers_python_implement": 8, "more": [8, 15, 20, 30, 32, 34, 35], "674": 8, "interest": [8, 22, 24, 25, 34, 35, 36], "given": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 35, 36], "below": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35], "072": 8, "465": 8, "466": 8, "exp_dir": [8, 17, 20, 22, 24, 25, 35, 36], "posixpath": [8, 17, 20], "exp": [8, 9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "lang_dir": [8, 17, 20], "lr": [8, 17, 34], "feature_dim": [8, 9, 15, 17, 20, 30], "weight_decai": 8, "1e": 8, "06": [8, 9, 18, 20, 23, 30], "start_epoch": 8, "best_train_loss": [8, 9], "inf": [8, 9], "best_valid_loss": [8, 9], "best_train_epoch": [8, 9], "best_valid_epoch": [8, 9], "batch_idx_train": [8, 9], "log_interv": [8, 9], "valid_interv": [8, 9], "beam_siz": [8, 9, 17], "reduct": [8, 24], "sum": 8, "use_doub": 8, "le_scor": 8, "true": [8, 9, 15, 17, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "world_siz": 8, "master_port": 8, "12354": 8, "num_epoch": 8, "feature_dir": [8, 20], "max_dur": [8, 20], "bucketing_sampl": [8, 20], "num_bucket": [8, 20], "concatenate_cut": [8, 20], "duration_factor": [8, 20], "gap": [8, 20], "on_the_fly_feat": [8, 20], "shuffl": [8, 20], "return_cut": [8, 20], "num_work": [8, 20], "074": 8, "113": [8, 17, 20], "098": [8, 23], "cut": [8, 20], "240": [8, 15, 30], "149": [8, 20], "200": [8, 9, 15, 20, 27, 28, 30], "singlecutsampl": 8, "206": [8, 20], "219": [8, 17, 20], "246": [8, 17, 20, 27, 28], "357": 8, "416": 8, "epoch": [8, 9, 11, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "batch": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "avg": [8, 9, 11, 12, 13, 17, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "loss": [8, 15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "0789": 8, "848": 8, "5356": 8, "7556": 8, "301": [8, 9, 20], "432": [8, 20], "9972": 8, "best": [8, 15, 18, 20], "805": 8, "2436": 8, "5717": 8, "33": [8, 15, 16, 17, 20, 27], "109": [8, 15, 20], "4167": 8, "121": [8, 23], "325": 8, "2214": 8, "798": [8, 17], "0781": 8, "1343": 8, "065": 8, "0859": 8, "556": 8, "0421": 8, "0975": 8, "810": 8, "0431": 8, "824": 8, "657": 8, "0109": 8, "984": [8, 20], "0093": 8, "0096": 8, "50": [8, 9, 20, 22, 27, 34, 35, 36], "239": [8, 17], "0104": 8, "0101": 8, "569": 8, "0092": 8, "819": [8, 27], "835": 8, "51": [8, 15, 20, 30], "024": 8, "0105": 8, "317": 8, "0099": 8, "0097": 8, "552": 8, "0108": 8, "869": 8, "0102": 8, "126": [8, 20], "128": [8, 20], "537": [8, 20], "192": [8, 20], "249": 8, "250": [8, 17, 23], "lm_dir": [8, 20], "search_beam": [8, 15, 20, 30], "output_beam": [8, 15, 20, 30], "min_active_st": [8, 15, 20, 30], "max_active_st": [8, 15, 20, 30], "10000": [8, 15, 20, 30], "use_double_scor": [8, 15, 20, 30], "193": 8, "213": [8, 30], "259": [8, 15], "devic": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 35, 36], "217": [8, 15, 20], "279": [8, 20], "averag": [8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "userwarn": [8, 17], "floor_divid": 8, "futur": [8, 17, 37], "round": [8, 17], "toward": [8, 17], "trunc": [8, 17], "function": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "NOT": [8, 15, 17, 20, 30], "floor": [8, 17], "incorrect": [8, 17], "neg": [8, 17], "valu": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "keep": [8, 17, 22, 35, 36], "behavior": [8, 17], "div": [8, 17], "b": [8, 17, 20, 27, 28], "rounding_mod": [8, 17], "actual": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "divis": [8, 17], "trigger": 8, "intern": 8, "aten": 8, "src": [8, 34], "nativ": 8, "binaryop": 8, "cpp": [8, 12], "450": [8, 15, 17, 18], "k": [8, 22, 27, 28, 34, 35, 36], "n": [8, 15, 22, 24, 25, 27, 28, 34, 35, 36], "220": [8, 17, 18, 20], "409": 8, "190": [8, 23], "until": [8, 20, 24], "571": [8, 20], "228": [8, 20], "transcript": [8, 15, 16, 17, 18, 20, 22, 23, 27, 28, 34, 35, 36], "recog": [8, 17, 20], "test_set": [8, 30], "572": 8, "util": [8, 20], "ins": [8, 20, 30], "del": [8, 20, 30], "sub": [8, 20, 30], "573": 8, "236": 8, "wrote": [8, 20], "detail": [8, 10, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "stat": [8, 20], "err": [8, 17, 20], "299": 8, "congratul": [8, 15, 18, 20, 23, 27, 28, 30], "first": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "fun": 8, "debug": 8, "variou": [8, 14, 37], "problem": 8, "mai": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36, 37], "encount": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "while": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "period": 9, "disk": 9, "optim": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "other": [9, 17, 20, 22, 23, 27, 28, 30, 32, 35, 36, 37], "relat": [9, 15, 17, 20, 23, 27, 28, 30], "resum": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "howev": 9, "onli": [9, 11, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36, 37], "strip": 9, "except": 9, "reduc": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "each": [9, 15, 17, 18, 20, 22, 24, 25, 32, 34, 35, 36], "well": [9, 30, 37], "usag": [9, 12, 13, 23, 27, 28, 30], "pruned_transducer_stateless3": [9, 11, 12, 32], "almost": [9, 22, 32, 35, 36], "dir": [9, 11, 12, 13, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "bpe": [9, 11, 12, 13, 20, 22, 24, 25, 34, 35, 36], "lang_bpe_500": [9, 11, 12, 13, 20, 22, 24, 25, 34, 35, 36], "dict": 9, "host": 9, "csukuangfj": [9, 15, 17, 18, 20, 23, 27, 28, 30, 34], "prune": [9, 17, 19, 21, 31, 32, 33, 34, 36], "transduc": [9, 10, 11, 16, 19, 21, 31, 32, 33], "stateless3": 9, "2022": [9, 17, 22, 24, 25, 34, 35], "05": [9, 15, 17, 18, 20, 28], "lf": [9, 15, 17, 18, 20, 23, 25, 27, 28, 30], "repo": 9, "prefix": 9, "those": 9, "xxx": 9, "wave": [9, 11, 15, 20], "iter": [9, 13, 22, 24, 25, 34, 35, 36], "1224000": 9, "greedy_search": [9, 17, 22, 24, 34, 35, 36], "test_wav": [9, 15, 17, 18, 20, 23, 27, 28, 30], "1089": [9, 20, 23], "134686": [9, 20, 23], "0001": [9, 20, 23], "wav": [9, 11, 13, 15, 17, 18, 20, 22, 24, 25, 27, 28, 30, 34, 35, 36], "1221": [9, 20, 23], "135766": [9, 20, 23], "0002": [9, 20, 23], "multipl": [9, 15, 17, 18, 20, 23, 27, 28, 30], "sound": [9, 13, 15, 17, 18, 20, 23, 27, 28, 30], "Its": [9, 20], "output": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "09": [9, 15, 17, 18, 20, 34], "02": [9, 17, 20, 22, 28, 34, 35], "233": 9, "265": 9, "reset_interv": 9, "3000": 9, "subsampling_factor": [9, 15, 17, 20], "encoder_dim": 9, "512": [9, 15, 17, 20], "nhead": [9, 15, 17, 20, 22, 35], "dim_feedforward": [9, 17], "2048": [9, 17], "num_encoder_lay": [9, 17], "decoder_dim": 9, "joiner_dim": 9, "model_warm_step": 9, "env_info": [9, 15, 17, 20], "releas": [9, 15, 17, 20, 34], "sha1": [9, 15, 17, 20], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 9, "date": [9, 15, 17, 20], "fri": 9, "oct": [9, 20], "miss": [9, 17, 20], "cu102": 9, "branch": [9, 15, 17, 20, 24], "1013": 9, "c39cba5": 9, "dirti": [9, 15, 20], "thu": [9, 17, 20, 23], "__init__": [9, 15, 17, 20], "jsonl": 9, "hostnam": [9, 17], "de": [9, 17], "74279": [9, 17], "0324160024": 9, "65bfd8b584": 9, "jjlbn": 9, "ip": [9, 17], "177": [9, 17, 18, 20], "203": [9, 20], "bpe_model": [9, 20], "sound_fil": [9, 15, 17, 20, 30], "sample_r": [9, 15, 17, 20, 30], "16000": [9, 15, 17, 18, 20, 23, 24, 27, 28], "beam": [9, 34], "max_context": 9, "max_stat": 9, "context_s": [9, 17], "max_sym_per_fram": [9, 17], "simulate_stream": 9, "decode_chunk_s": 9, "left_context": 9, "dynamic_chunk_train": 9, "causal_convolut": 9, "short_chunk_s": [9, 35, 36], "25": [9, 15, 20, 22, 27, 28, 30, 35], "num_left_chunk": 9, "blank_id": [9, 17], "unk_id": 9, "vocab_s": [9, 17], "271": 9, "273": [9, 17], "612": 9, "458": 9, "disabl": 9, "giga": [9, 34], "623": 9, "277": 9, "paramet": [9, 12, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "78648040": 9, "951": [9, 20], "285": [9, 17, 20], "construct": [9, 15, 17, 18, 20, 23, 27, 28, 30], "952": 9, "295": [9, 15, 17, 18, 20], "957": 9, "700": 9, "329": [9, 20], "912": 9, "388": 9, "earli": [9, 20, 23], "nightfal": [9, 20, 23], "THE": [9, 20, 23], "yellow": [9, 20, 23], "lamp": [9, 20, 23], "light": [9, 20, 23], "here": [9, 15, 17, 18, 20, 23, 32, 34, 35], "AND": [9, 20, 23], "THERE": [9, 20, 23], "squalid": [9, 20, 23], "quarter": [9, 20, 23], "OF": [9, 20, 23], "brothel": [9, 20, 23], "god": [9, 20, 23], "AS": [9, 20, 23], "direct": [9, 20, 23], "consequ": [9, 20, 23], "sin": [9, 20, 23], "man": [9, 20, 23], "punish": [9, 20, 23], "had": [9, 20, 23], "her": [9, 20, 23], "love": [9, 20, 23], "child": [9, 20, 23], "whose": [9, 17, 20, 23], "ON": [9, 20, 23, 34], "THAT": [9, 20, 23], "dishonor": [9, 20, 23], "bosom": [9, 20, 23], "TO": [9, 20, 23], "parent": [9, 20, 23], "forev": [9, 20, 23], "WITH": [9, 20, 23], "race": [9, 20, 23], "descent": [9, 20, 23], "mortal": [9, 20, 23], "BE": [9, 20, 23], "bless": [9, 20, 23], "soul": [9, 20, 23], "IN": [9, 20, 23], "heaven": [9, 20, 23], "yet": [9, 20, 23], "THESE": [9, 20, 23], "thought": [9, 20, 23], "affect": [9, 20, 23], "hester": [9, 20, 23], "prynn": [9, 20, 23], "less": [9, 20, 23, 30, 35, 36], "hope": [9, 16, 20, 23], "than": [9, 15, 17, 18, 20, 22, 23, 24, 25, 30, 34, 35, 36], "apprehens": [9, 20, 23], "390": 9, "alwai": 9, "note": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "down": [9, 15, 20, 22, 24, 25, 34, 35, 36], "reproduc": [9, 20], "ln": [9, 15, 20, 22, 24, 25, 34, 35, 36], "9999": [9, 24, 25, 34], "symlink": 9, "pass": [9, 15, 17, 18, 20, 22, 24, 25, 32, 34, 35, 36], "max": [9, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "durat": [9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "600": [9, 20, 22, 24, 34, 35, 36], "reason": [9, 35], "support": [10, 11, 15, 17, 20, 22, 24, 25, 32, 34, 35, 36], "perform": [10, 11, 17, 35], "raspberri": [10, 34], "pi": [10, 34], "project": 10, "static": [10, 34], "produc": [10, 22, 24, 25, 34, 35, 36], "binari": [10, 15, 17, 18, 20, 22, 30, 34, 35], "everyth": 10, "tree": [11, 12, 13, 15, 17, 18, 20, 23, 27, 28, 30, 34], "insid": [11, 13], "encod": [11, 13, 15, 17, 18, 20, 22, 23, 24, 30, 32, 34, 35, 36], "joiner": [11, 13, 17, 22, 34, 35, 36], "joiner_encoder_proj": 11, "joiner_decoder_proj": 11, "onnx_pretrain": 11, "proj": 11, "baz": [11, 13], "onnxruntim": 11, "our": [12, 13, 20, 22, 32, 35, 36], "torchscript": [12, 13, 34], "cpu_jit": [12, 15, 20, 22, 24, 25, 35, 36], "confus": 12, "move": [12, 22, 24, 25, 35, 36], "why": 12, "streaming_asr": [12, 13, 34, 35, 36], "emform": 12, "conv_emform": 12, "offline_asr": [12, 22], "lstm_transducer_stateless2": [13, 34], "468000": [13, 34], "three": [13, 15, 17, 32], "encoder_jit_trac": [13, 34, 36], "decoder_jit_trac": [13, 34, 36], "joiner_jit_trac": [13, 34, 36], "jit_pretrain": [13, 24, 25, 34], "tutori": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "learn": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "singl": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "1best": [15, 18, 20, 23, 24, 25, 27, 28], "handl": [15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "automag": [15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "stop": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "control": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "By": [15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "default": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "execut": [15, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "mean": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 32, 34, 35, 36], "musan": [15, 18, 20, 22, 24, 25, 34, 35, 36], "sai": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "thei": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "won": [15, 18, 20, 22, 24, 25, 34, 35, 36], "re": [15, 18, 20, 22, 24, 25, 34, 35, 36], "intal": [15, 18], "initi": [15, 18], "sudo": [15, 18], "apt": [15, 18], "permiss": [15, 18], "commandlin": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "quit": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "often": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "experi": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "num": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "state": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "world": [15, 17, 18, 20, 22, 23, 24, 25, 34, 35, 36], "multi": [15, 17, 18, 20, 22, 24, 25, 32, 34, 35, 36], "machin": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "ddp": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "implement": [15, 17, 18, 20, 22, 24, 25, 32, 34, 35, 36], "present": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "later": [15, 18, 20, 22, 23, 24, 25, 27, 28, 34, 35, 36], "specifi": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "second": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "over": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "utter": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "pad": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "oom": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "v100": [15, 17, 18, 20], "nvidia": [15, 17, 18, 20], "due": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "usual": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "larger": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "caus": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "smaller": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "increas": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "tune": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "weight": [15, 18, 20, 24, 25, 34], "decai": [15, 18, 20, 24, 25, 34], "warmup": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "get_param": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "realli": [15, 18, 20, 22, 24, 25, 34, 35, 36], "directli": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "perturb": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "speed": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "factor": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "3x150": [15, 17, 18], "hour": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "These": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "rate": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "visual": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "logdir": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "labelsmooth": 15, "someth": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "tensorflow": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "found": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "continu": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "press": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "ctrl": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35, 36], "engw8ksktzqs24zbv5dgcg": 15, "22t11": 15, "scan": [15, 17, 18, 20, 22, 30, 34, 35], "116068": 15, "scalar": [15, 17, 18, 20, 22, 30, 34, 35], "listen": [15, 17, 18, 22, 30, 34, 35], "url": [15, 17, 18, 20, 22, 24, 25, 30, 34, 35], "xxxx": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "text": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "saw": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "consol": [15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "typic": [15, 17, 18, 20], "avoid": [15, 17, 20], "commonli": [15, 17, 18, 20, 23, 27, 28, 30], "nbest": [15, 20, 25], "scale": [15, 20, 23, 25, 27, 28], "lattic": [15, 18, 20, 22, 23, 27, 28, 35, 36], "score": [15, 20, 22, 35, 36], "uniqu": [15, 20, 22, 35, 36], "pkufool": [15, 18, 23], "icefall_asr_aishell_conformer_ctc": 15, "transcrib": [15, 17, 18, 20], "lang_char": [15, 17], "token": [15, 17, 18, 20, 23, 27, 28, 30], "word": [15, 17, 18, 20, 23, 27, 28, 30], "bac009s0764w0121": [15, 17, 18], "bac009s0764w0122": [15, 17, 18], "bac009s0764w0123": [15, 17, 18], "tran": [15, 18, 20, 23, 27, 28], "graph": [15, 18, 20, 22, 23, 27, 28, 35, 36], "id": [15, 18, 20, 23, 27, 28], "conveni": [15, 18, 20], "eo": [15, 18, 20], "easili": [15, 18, 20], "obtain": [15, 17, 18, 20, 23, 27, 28], "84": 15, "list": [15, 17, 18, 20, 23, 27, 28], "soxi": [15, 17, 18, 20, 23, 30], "sampl": [15, 17, 18, 20, 23, 24, 30, 35, 36], "precis": [15, 17, 18, 20, 22, 23, 30, 35, 36], "bit": [15, 17, 18, 20, 23, 30], "04": [15, 17, 18, 20, 23, 27, 28], "67263": [15, 17, 18], "315": [15, 17, 18, 20, 23], "cdda": [15, 17, 18, 20, 23, 30], "sector": [15, 17, 18, 20, 23, 30], "135k": [15, 17, 18], "256k": [15, 17, 18, 20], "sign": [15, 17, 18, 20, 30], "integ": [15, 17, 18, 20, 30], "pcm": [15, 17, 18, 20, 30], "65840": [15, 17, 18], "625": [15, 17, 18], "132k": [15, 17, 18], "64000": [15, 17, 18], "300": [15, 17, 18, 20, 22, 35], "128k": [15, 17, 18, 30], "displai": [15, 17, 18, 20], "topologi": [15, 20], "07": [15, 17, 18, 20], "53": [15, 22, 23, 28, 34, 35], "707": [15, 20], "229": 15, "attention_dim": [15, 17, 20], "num_decoder_lay": [15, 20], "vgg_frontend": [15, 17, 20], "use_feat_batchnorm": [15, 20], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 15, "sun": 15, "sep": 15, "46": [15, 20], "33cfe45": 15, "d57a873": 15, "wed": [15, 17, 20], "nov": [15, 20], "hw": 15, "kangwei": 15, "icefall_aishell3": 15, "k2_releas": 15, "tokens_fil": 15, "words_fil": [15, 20, 30], "num_path": [15, 20, 22, 35, 36], "ngram_lm_scal": [15, 20], "attention_decoder_scal": [15, 20], "nbest_scal": [15, 20], "sos_id": [15, 20], "eos_id": [15, 20], "num_class": [15, 20, 30], "4336": [15, 17], "708": [15, 17, 20, 30], "242": [15, 20], "131": [15, 20], "134": 15, "269": [15, 27, 28], "275": 15, "241": 15, "293": [15, 20], "704": [15, 27], "369": [15, 20], "\u751a": [15, 17], "\u81f3": [15, 17], "\u51fa": [15, 17], "\u73b0": [15, 17], "\u4ea4": [15, 17], "\u6613": [15, 17], "\u51e0": [15, 17], "\u4e4e": [15, 17], "\u505c": [15, 17], "\u6b62": 15, "\u7684": [15, 17, 18], "\u60c5": [15, 17], "\u51b5": [15, 17], "\u4e00": [15, 17], "\u4e8c": [15, 17], "\u7ebf": [15, 17, 18], "\u57ce": [15, 17], "\u5e02": [15, 17], "\u867d": [15, 17], "\u7136": [15, 17], "\u4e5f": [15, 17, 18], "\u5904": [15, 17], "\u4e8e": [15, 17], "\u8c03": [15, 17], "\u6574": [15, 17], "\u4e2d": [15, 17, 18], "\u4f46": [15, 17, 18], "\u56e0": [15, 17], "\u4e3a": [15, 17], "\u805a": [15, 17], "\u96c6": [15, 17], "\u4e86": [15, 17, 18], "\u8fc7": [15, 17], "\u591a": [15, 17], "\u516c": [15, 17], "\u5171": [15, 17], "\u8d44": [15, 17], "\u6e90": [15, 17], "371": 15, "37": [15, 17, 20, 27], "38": [15, 17, 20, 27], "683": 15, "47": [15, 20], "651": [15, 30], "654": 15, "659": 15, "752": 15, "321": 15, "887": 15, "340": 15, "370": 15, "\u751a\u81f3": [15, 18], "\u51fa\u73b0": [15, 18], "\u4ea4\u6613": [15, 18], "\u51e0\u4e4e": [15, 18], "\u505c\u6b62": 15, "\u60c5\u51b5": [15, 18], "\u4e00\u4e8c": [15, 18], "\u57ce\u5e02": [15, 18], "\u867d\u7136": [15, 18], "\u5904\u4e8e": [15, 18], "\u8c03\u6574": [15, 18], "\u56e0\u4e3a": [15, 18], "\u805a\u96c6": [15, 18], "\u8fc7\u591a": [15, 18], "\u516c\u5171": [15, 18], "\u8d44\u6e90": [15, 18], "372": 15, "recor": [15, 20], "highest": [15, 20], "965": 15, "966": 15, "821": 15, "822": 15, "826": 15, "916": 15, "115": [15, 20], "345": 15, "888": 15, "889": 15, "limit": [15, 17, 20, 32, 35], "memori": [15, 17, 20, 32], "upgrad": [15, 20], "pro": [15, 20], "finish": [15, 17, 18, 20, 22, 23, 27, 28, 30, 35, 36], "deploi": [15, 20], "At": [15, 20], "doe": [15, 17, 20, 30], "home": [15, 20], "checkout": [15, 20], "v2": [15, 20], "cmake": [15, 20, 34], "dcmake_build_typ": [15, 20, 34], "j": [15, 20], "hlg_decod": [15, 20], "four": [15, 20], "messag": [15, 20, 22, 24, 25, 34, 35, 36], "nn_model": [15, 20], "use_gpu": [15, 20], "word_tabl": [15, 20], "caution": [15, 20], "forward": [15, 20, 24], "cu": [15, 20], "int": [15, 20], "char": [15, 20], "124": [15, 20], "98": 15, "142": [15, 18, 20], "150": [15, 20], "693": [15, 27], "165": [15, 20], "nnet_output": [15, 20], "182": [15, 23], "180": [15, 20], "489": 15, "45": [15, 17, 20], "216": [15, 20, 27, 28], "mandarin": 16, "corpu": 16, "beij": 16, "shell": 16, "technologi": 16, "ltd": 16, "400": 16, "peopl": 16, "accent": 16, "area": 16, "china": 16, "invit": 16, "particip": 16, "conduct": 16, "quiet": 16, "indoor": 16, "high": 16, "fidel": 16, "microphon": 16, "downsampl": 16, "16khz": 16, "manual": 16, "accuraci": 16, "through": 16, "profession": 16, "annot": 16, "strict": 16, "inspect": 16, "free": [16, 34], "academ": 16, "moder": 16, "amount": 16, "research": 16, "field": 16, "openslr": 16, "ctc": [16, 19, 21, 25, 26, 29], "stateless": [16, 19, 22, 34, 35, 36], "instead": [17, 35], "rnn": [17, 22, 24, 34, 35, 36], "As": [17, 20], "head": [17, 32], "dim": [17, 22, 35], "layer": [17, 22, 32, 34, 35, 36], "feedforward": [17, 22, 35], "embed": [17, 22, 34, 35, 36], "conv1d": [17, 22, 34, 35, 36], "kernel": 17, "left": [17, 35, 36], "context": [17, 22, 32, 34, 35, 36], "nn": [17, 22, 24, 25, 34, 35, 36], "tanh": 17, "linear": 17, "borrow": 17, "ieeexplor": 17, "ieee": 17, "stamp": 17, "jsp": 17, "arnumb": 17, "9054419": 17, "predict": [17, 22, 34, 35, 36], "modif": 17, "right": [17, 32, 35], "charact": 17, "unit": 17, "vocabulari": 17, "87939824": 17, "88": 17, "optimized_transduc": 17, "extra": [17, 32, 35], "technqiu": 17, "propos": [17, 32, 36], "improv": 17, "end": [17, 22, 24, 25, 30, 34, 35, 36], "furthermor": 17, "maximum": 17, "emit": 17, "per": [17, 22, 35, 36], "frame": [17, 22, 24, 35, 36], "simplifi": [17, 32], "significantli": 17, "degrad": 17, "exactli": 17, "benchmark": 17, "unprun": 17, "advantag": 17, "minim": 17, "pruned_transducer_stateless": [17, 22, 32, 35], "altern": 17, "though": 17, "transducer_stateless_modifi": 17, "option": [17, 23, 27, 28, 30], "pr": 17, "gb": 17, "ram": 17, "small": [17, 27, 28, 30], "tri": 17, "prob": [17, 34], "appli": [17, 32], "configur": [17, 23, 27, 28, 30], "c": [17, 18, 22, 24, 25, 30, 34, 35, 36], "lagz6hrcqxoigbfd5e0y3q": 17, "03t14": 17, "8477": 17, "sym": [17, 22, 35, 36], "beam_search": [17, 22, 35, 36], "decoding_method": 17, "beam_4": 17, "28": [17, 20, 23], "ensur": 17, "give": 17, "poor": 17, "531": [17, 18], "994": [17, 20], "176": [17, 20], "027": 17, "encoder_out_dim": 17, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 17, "feb": 17, "50d2281": 17, "mar": 17, "0815224919": 17, "75d558775b": 17, "mmnv8": 17, "72": [17, 20], "878": [17, 28], "257": [17, 27, 28], "880": 17, "267": [17, 27, 28], "891": 17, "__floordiv__": 17, "length": [17, 35, 36], "x_len": 17, "163": [17, 20], "320": 17, "\u6ede": 17, "322": 17, "759": 17, "760": 17, "919": 17, "922": 17, "046": 17, "047": 17, "319": [17, 20], "214": [17, 20], "215": [17, 20, 23], "402": 17, "topk_hyp_index": 17, "topk_index": 17, "logit": 17, "583": [17, 28], "2000": 18, "lji9mwuorlow3jkdhxwk8a": 18, "13t11": 18, "4454": 18, "icefall_asr_aishell_tdnn_lstm_ctc": 18, "858": [18, 20], "389": [18, 20], "154": 18, "161": [18, 20], "536": 18, "171": [18, 20, 27, 28], "539": 18, "917": 18, "207": [18, 20], "129": 18, "\u505c\u6ede": 18, "222": [18, 20], "statelessx": [19, 21, 31, 32, 33], "zipform": [19, 21, 31, 33], "mmi": [19, 21], "blank": [19, 21], "skip": [19, 21, 22, 34, 35, 36], "ligru": [19, 26], "full": [20, 22, 24, 25, 34, 35, 36], "libri": [20, 22, 24, 25, 34, 35, 36], "960": [20, 22, 24, 25, 34, 35, 36], "subset": [20, 22, 24, 25, 34, 35, 36], "3x960": [20, 22, 24, 25, 34, 35, 36], "2880": [20, 22, 24, 25, 34, 35, 36], "lzgnetjwrxc3yghnmd4kpw": 20, "24t16": 20, "43": 20, "4540": 20, "sentenc": 20, "piec": 20, "And": [20, 22, 24, 25, 34, 35, 36], "neither": 20, "nor": 20, "vocab": 20, "work": 20, "5000": 20, "44": [20, 27, 28], "033": 20, "538": 20, "full_libri": 20, "406": 20, "464": 20, "548": 20, "776": 20, "652": [20, 30], "109226120": 20, "714": [20, 27], "473": 20, "944": 20, "1328": 20, "54": [20, 23, 27, 28], "443": [20, 23], "2563": 20, "56": [20, 27], "494": 20, "592": 20, "331": [20, 23], "1715": 20, "52576": 20, "1424": 20, "807": 20, "506": 20, "808": [20, 27], "522": 20, "362": 20, "565": 20, "1477": 20, "106": 20, "2922": 20, "208": 20, "4295": 20, "52343": 20, "396": 20, "3584": 20, "433": 20, "680": [20, 27], "_pickl": 20, "unpicklingerror": 20, "kei": 20, "hlg_modifi": 20, "g_4_gram": [20, 23, 27, 28], "106000": [20, 23], "496": [20, 23], "875": [20, 23], "212k": 20, "267440": [20, 23], "1253": [20, 23], "535k": 20, "83": [20, 23], "77200": [20, 23], "361": [20, 23], "154k": 20, "554": 20, "260": 20, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 20, "tue": 20, "8d93169": 20, "266": [20, 23], "268": [20, 23], "601": 20, "758": 20, "025": 20, "204": 20, "425": 20, "broffel": 20, "osom": 20, "427": 20, "723": 20, "775": 20, "881": 20, "352": 20, "234": 20, "384": 20, "whole": [20, 23, 27, 28, 35, 36], "ngram": [20, 23, 27, 28], "857": 20, "979": 20, "980": 20, "055": 20, "117": 20, "051": 20, "363": 20, "959": [20, 28], "546": 20, "598": 20, "599": [20, 23], "833": 20, "834": 20, "915": 20, "076": 20, "110": 20, "397": 20, "999": [20, 22, 35, 36], "concaten": 20, "bucket": 20, "sampler": 20, "1000": 20, "ctc_decod": 20, "ngram_lm_rescor": 20, "attention_rescor": 20, "kind": [20, 22, 24, 25, 34, 35, 36], "316": 20, "118": 20, "58": 20, "221": 20, "125": [20, 30], "136": 20, "144": 20, "159": [20, 30], "543": 20, "174": 20, "topo": 20, "547": 20, "729": 20, "111": 20, "702": 20, "703": 20, "545": 20, "122": 20, "280": 20, "135": [20, 30], "153": [20, 30], "945": 20, "475": 20, "191": [20, 27, 28], "398": 20, "199": [20, 23], "515": 20, "205": 20, "w": [20, 27, 28], "deseri": 20, "441": 20, "fsaclass": 20, "loadfsa": 20, "const": 20, "string": 20, "c10": 20, "ignor": 20, "attribut": 20, "dummi": 20, "589": 20, "attention_scal": 20, "656": 20, "162": 20, "169": [20, 27, 28], "188": 20, "624": 20, "519": [20, 28], "632": 20, "645": [20, 30], "243": 20, "970": 20, "303": 20, "179": 20, "suitabl": [22, 34, 35, 36], "pruned_transducer_stateless2": [22, 32, 35], "pruned_transducer_stateless4": [22, 32, 35], "pruned_transducer_stateless5": [22, 32, 35], "scroll": [22, 24, 25, 34, 35, 36], "scratch": [22, 24, 25, 34, 35, 36], "paper": [22, 34, 35, 36], "arxiv": [22, 34, 35, 36], "ab": [22, 34, 35, 36], "2206": [22, 34, 35, 36], "13236": [22, 34, 35, 36], "rework": [22, 32, 35], "daniel": [22, 35, 36], "joint": [22, 34, 35, 36], "contrari": [22, 34, 35, 36], "convent": [22, 34, 35, 36], "That": [22, 34, 35, 36], "recurr": [22, 34, 35, 36], "fp16": [22, 24, 25, 34, 35, 36], "half": [22, 35, 36], "2x": [22, 35, 36], "dimens": [22, 35, 36], "littl": [22, 35], "allow": [22, 35], "436000": [22, 24, 25, 34, 35, 36], "438000": [22, 24, 25, 34, 35, 36], "qogspbgsr8kzcrmmie9jgw": 22, "20t15": [22, 34, 35], "4468": [22, 34, 35], "210171": [22, 34, 35], "access": [22, 24, 25, 34, 35, 36], "6008": [22, 24, 25, 34, 35, 36], "localhost": [22, 24, 25, 34, 35, 36], "expos": [22, 24, 25, 34, 35, 36], "proxi": [22, 24, 25, 34, 35, 36], "bind_al": [22, 24, 25, 34, 35, 36], "suggest": [22, 24, 25, 34, 35, 36], "both": [22, 24, 25, 34, 35, 36], "lowest": [22, 24, 25, 34, 35, 36], "fast_beam_search": [22, 24, 34, 35, 36], "474000": [22, 34, 35, 36], "largest": [22, 35, 36], "posterior": [22, 24, 35, 36], "algorithm": [22, 35, 36], "pdf": [22, 25, 35, 36], "1211": [22, 35, 36], "3711": [22, 35, 36], "espnet": [22, 35, 36], "net": [22, 35, 36], "beam_search_transduc": [22, 35, 36], "basicli": [22, 35, 36], "topk": [22, 35, 36], "expand": [22, 35, 36], "mode": [22, 35, 36], "being": [22, 35, 36], "hardcod": [22, 35, 36], "composit": [22, 35, 36], "between": [22, 35, 36], "log_prob": [22, 35, 36], "hard": [22, 32, 35, 36], "2211": [22, 35, 36], "00484": [22, 35, 36], "rnnt": [22, 35, 36], "effici": [22, 35, 36], "fast_beam_search_lg": [22, 35, 36], "trivial": [22, 35, 36], "fast_beam_search_nbest": [22, 35, 36], "random_path": [22, 35, 36], "shortest": [22, 35, 36], "fast_beam_search_nbest_lg": [22, 35, 36], "logic": [22, 35, 36], "includ": [22, 24, 25, 34, 35, 36], "But": [22, 24, 25, 34, 35, 36], "smallest": [22, 34, 35, 36], "icefall_asr_librispeech_tdnn": 23, "lstm_ctc": 23, "flac": 23, "116k": 23, "140k": 23, "343k": 23, "164k": 23, "105k": 23, "174k": 23, "pretraind": 23, "168": 23, "170": 23, "581": 23, "584": [23, 28], "209": 23, "791": 23, "245": 23, "099": 23, "methond": [23, 27, 28], "725": 23, "403": 23, "631": 23, "010": 23, "guidanc": 24, "calcul": [24, 35, 36], "bigger": 24, "threshold": 24, "simpli": 24, "discard": 24, "prevent": 24, "convolut": [24, 32, 35], "similar": [24, 35, 36], "lconv": 24, "encourag": [24, 25, 34], "stabil": [24, 25], "doesn": 24, "warm": [24, 25], "pruned_transducer_stateless7_ctc_b": 24, "xyozukpeqm62hbilud4upa": [24, 25], "ctc_guild_decode_b": 24, "pretrained_ctc": 24, "jit_pretrained_ctc": 24, "yfyeung": 24, "wechat": 25, "zipformer_mmi": 25, "worker": [25, 34], "hp": 25, "zengwei": [25, 34], "tdnn_ligru_ctc": 27, "enough": [27, 28, 30], "luomingshuang": [27, 28], "icefall_asr_timit_tdnn_ligru_ctc": 27, "pretrained_average_9_25": 27, "fdhc0_si1559": [27, 28], "felc0_si756": [27, 28], "fmgd0_si1564": [27, 28], "ffprobe": [27, 28], "show_format": [27, 28], "nistspher": [27, 28], "database_id": [27, 28], "database_vers": [27, 28], "utterance_id": [27, 28], "dhc0_si1559": [27, 28], "sample_min": [27, 28], "4176": [27, 28], "sample_max": [27, 28], "5984": [27, 28], "bitrat": [27, 28], "258": [27, 28], "audio": [27, 28], "pcm_s16le": [27, 28], "hz": [27, 28], "s16": [27, 28], "256": [27, 28], "elc0_si756": [27, 28], "1546": [27, 28], "1989": [27, 28], "mgd0_si1564": [27, 28], "7626": [27, 28], "10573": [27, 28], "660": 27, "183": [27, 28], "695": 27, "697": 27, "210": [27, 28], "829": 27, "sil": [27, 28], "dh": [27, 28], "ih": [27, 28], "uw": [27, 28], "ah": [27, 28], "ii": [27, 28], "z": [27, 28], "aa": [27, 28], "ei": [27, 28], "dx": [27, 28], "uh": [27, 28], "ng": [27, 28], "th": [27, 28], "eh": [27, 28], "jh": [27, 28], "er": [27, 28], "ai": [27, 28], "hh": [27, 28], "aw": 27, "ae": [27, 28], "705": 27, "715": 27, "720": 27, "251": [27, 28], "348": 27, "ch": 27, "icefall_asr_timit_tdnn_lstm_ctc": 28, "pretrained_average_16_25": 28, "816": 28, "827": 28, "387": 28, "unk": 28, "739": 28, "971": 28, "977": 28, "978": 28, "981": 28, "ow": 28, "ykubhb5wrmosxykid1z9eg": 30, "23t23": 30, "sinc": [30, 34], "icefall_asr_yesno_tdnn": 30, "l_disambig": 30, "lexicon_disambig": 30, "arpa": 30, "0_0_0_1_0_0_0_1": 30, "0_0_1_0_0_0_1_0": 30, "0_0_1_0_0_1_1_1": 30, "0_0_1_0_1_0_0_1": 30, "0_0_1_1_0_0_0_1": 30, "0_0_1_1_0_1_1_0": 30, "0_0_1_1_1_0_0_0": 30, "0_0_1_1_1_1_0_0": 30, "0_1_0_0_0_1_0_0": 30, "0_1_0_0_1_0_1_0": 30, "0_1_0_1_0_0_0_0": 30, "0_1_0_1_1_1_0_0": 30, "0_1_1_0_0_1_1_1": 30, "0_1_1_1_0_0_1_0": 30, "0_1_1_1_1_0_1_0": 30, "1_0_0_0_0_0_0_0": 30, "1_0_0_0_0_0_1_1": 30, "1_0_0_1_0_1_1_1": 30, "1_0_1_1_0_1_1_1": 30, "1_0_1_1_1_1_0_1": 30, "1_1_0_0_0_1_1_1": 30, "1_1_0_0_1_0_1_1": 30, "1_1_0_1_0_1_0_0": 30, "1_1_0_1_1_0_0_1": 30, "1_1_0_1_1_1_1_0": 30, "1_1_1_0_0_1_0_1": 30, "1_1_1_0_1_0_1_0": 30, "1_1_1_1_0_0_1_0": 30, "1_1_1_1_1_0_0_0": 30, "1_1_1_1_1_1_1_1": 30, "54080": 30, "507": 30, "108k": 30, "No": 30, "ye": 30, "hebrew": 30, "NO": 30, "621": 30, "119": 30, "127": 30, "650": 30, "139": 30, "143": 30, "198": 30, "181": 30, "186": 30, "187": 30, "287": 30, "correctli": 30, "simplest": 30, "former": 32, "idea": 32, "achiev": 32, "mask": [32, 35, 36], "wenet": 32, "did": 32, "argument": 32, "adapt": 32, "complic": 32, "techniqu": 32, "bank": 32, "compon": 32, "memor": 32, "histori": 32, "introduc": 32, "variant": 32, "pruned_stateless_emformer_rnnt2": 32, "conv_emformer_transducer_stateless": 32, "convemform": 32, "ourself": 32, "mechan": 32, "conv_emformer_transducer_stateless2": 32, "onlin": 34, "lstm_transducer_stateless": 34, "architectur": 34, "lower": 34, "prepare_giga_speech": 34, "cj2vtpiwqhkn9q1tx6ptpg": 34, "hidden": 34, "1024": 34, "pnnx": 34, "submodul": 34, "updat": 34, "recurs": 34, "init": 34, "dncnn_python": 34, "dncnn_build_benchmark": 34, "off": 34, "dncnn_build_exampl": 34, "dncnn_build_tool": 34, "j4": 34, "pwd": 34, "quantiz": 34, "abl": 34, "ncnn2int8": 34, "third": 34, "param": 34, "extern": 34, "stateless2": 34, "compar": 35, "dynam": [35, 36], "chunk": [35, 36], "causal": 35, "short": [35, 36], "2012": 35, "05481": 35, "flag": 35, "indic": [35, 36], "whether": 35, "must": 35, "sequenc": [35, 36], "uniformli": [35, 36], "most": [35, 36], "seen": [35, 36], "97vkxf80ru61cnp2alwzzg": 35, "streaming_decod": [35, 36], "acoust": [35, 36], "wise": [35, 36], "subsampl": [35, 36], "equal": [35, 36], "where": 35, "parallel": [35, 36], "bath": [35, 36], "parallelli": [35, 36], "seem": 35, "benefit": 35, "might": [35, 36], "mismatch": 35, "yourself": [35, 36], "mdoel": 35, "pruned_transducer_stateless7_stream": 36, "len": 36, "320m": 36, "550": 36, "scriptmodul": 36, "jit_trace_export": 36, "jit_trace_pretrain": 36, "task": 37}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 9, 11, 12, 13], "creat": [2, 8], "recip": [2, 37], "data": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "prepar": [2, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "train": [2, 5, 8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "decod": [2, 8, 9, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "pre": [2, 5, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "model": [2, 5, 9, 11, 12, 13, 14, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "huggingfac": [4, 6], "space": 6, "youtub": [6, 8], "video": [6, 8], "icefal": [7, 8], "content": [7, 37], "instal": [8, 15, 17, 18, 20, 23, 27, 28], "0": 8, "pytorch": 8, "torchaudio": 8, "1": [8, 15, 17, 18, 20], "k2": 8, "2": [8, 15, 17, 18, 20], "lhots": 8, "3": [8, 15, 17, 20], "download": [8, 15, 17, 18, 20, 22, 23, 24, 25, 27, 28, 30, 34, 35, 36], "exampl": [8, 15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "virtual": 8, "environ": 8, "activ": 8, "your": 8, "4": 8, "5": 8, "test": 8, "export": [9, 10, 11, 12, 13, 14, 22, 24, 25, 34, 35, 36], "state_dict": [9, 22, 24, 25, 34, 35, 36], "when": [9, 11, 12, 13], "us": [9, 11, 12, 13, 22, 24, 25, 34, 35, 36], "run": 9, "py": 9, "ncnn": [10, 34], "onnx": 11, "torch": [12, 13, 22, 24, 25, 34, 35, 36], "jit": [12, 13, 22, 24, 25, 34, 35, 36], "script": [12, 22, 24, 25, 35, 36], "trace": [13, 34, 36], "conform": [15, 20, 32], "ctc": [15, 18, 20, 23, 24, 27, 28, 30], "configur": [15, 18, 20, 22, 24, 25, 34, 35, 36], "option": [15, 18, 20, 22, 24, 25, 34, 35, 36], "log": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "usag": [15, 17, 18, 20, 22, 24, 25, 34, 35, 36], "case": [15, 17, 18, 20], "kaldifeat": [15, 17, 18, 20, 23, 27, 28, 30], "hlg": [15, 18, 20], "attent": [15, 20], "rescor": [15, 20], "colab": [15, 17, 18, 20, 23, 27, 28, 30], "notebook": [15, 17, 18, 20, 23, 27, 28, 30], "deploy": [15, 20], "c": [15, 20], "aishel": 16, "stateless": 17, "transduc": [17, 22, 34, 35, 36], "The": 17, "loss": 17, "todo": 17, "greedi": 17, "search": 17, "beam": 17, "modifi": 17, "tdnn": [18, 23, 27, 28, 30], "lstm": [18, 23, 28, 34], "non": 19, "stream": [19, 31, 32, 35, 36], "asr": [19, 31], "lm": 20, "comput": 20, "wer": 20, "n": 20, "gram": 20, "librispeech": [21, 33], "prune": [22, 35], "statelessx": [22, 35], "pretrain": [22, 24, 25, 34, 35, 36], "deploi": [22, 35, 36], "sherpa": [22, 35, 36], "infer": [23, 27, 28, 30], "zipform": [24, 25, 36], "blank": 24, "skip": 24, "mmi": 25, "timit": 26, "ligru": 27, "yesno": 29, "introduct": 32, "emform": 32, "which": 34, "simul": [35, 36], "real": [35, 36], "tabl": 37}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [17, "data-preparation"]], "Training": [[2, "training"], [8, "training"], [15, "training"], [17, "training"], [18, "training"], [20, "training"], [22, "training"], [23, "training"], [24, "training"], [25, "training"], [27, "training"], [28, "training"], [30, "training"], [34, "training"], [35, "training"], [36, "training"]], "Decoding": [[2, "decoding"], [8, "decoding"], [15, "decoding"], [17, "decoding"], [18, "decoding"], [20, "decoding"], [22, "decoding"], [23, "decoding"], [24, "decoding"], [25, "decoding"], [27, "decoding"], [28, "decoding"], [30, "decoding"], [34, "decoding"], [35, "decoding"], [36, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "Huggingface": [[4, "huggingface"]], "Pre-trained models": [[5, "pre-trained-models"]], "Huggingface spaces": [[6, "huggingface-spaces"]], "YouTube Video": [[6, "youtube-video"], [8, "youtube-video"]], "Icefall": [[7, "icefall"]], "Contents:": [[7, null]], "Installation": [[8, "installation"]], "(0) Install PyTorch and torchaudio": [[8, "install-pytorch-and-torchaudio"]], "(1) Install k2": [[8, "install-k2"]], "(2) Install lhotse": [[8, "install-lhotse"]], "(3) Download icefall": [[8, "download-icefall"]], "Installation example": [[8, "installation-example"]], "(1) Create a virtual environment": [[8, "create-a-virtual-environment"]], "(2) Activate your virtual environment": [[8, "activate-your-virtual-environment"]], "(3) Install k2": [[8, "id1"]], "(4) Install lhotse": [[8, "id2"]], "(5) Download icefall": [[8, "id3"]], "Test Your Installation": [[8, "test-your-installation"]], "Data preparation": [[8, "data-preparation"], [15, "data-preparation"], [18, "data-preparation"], [20, "data-preparation"], [22, "data-preparation"], [23, "data-preparation"], [24, "data-preparation"], [25, "data-preparation"], [27, "data-preparation"], [28, "data-preparation"], [30, "data-preparation"], [34, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"]], "Export model.state_dict()": [[9, "export-model-state-dict"], [22, "export-model-state-dict"], [24, "export-model-state-dict"], [25, "export-model-state-dict"], [34, "export-model-state-dict"], [35, "export-model-state-dict"], [36, "export-model-state-dict"]], "When to use it": [[9, "when-to-use-it"], [11, "when-to-use-it"], [12, "when-to-use-it"], [13, "when-to-use-it"]], "How to export": [[9, "how-to-export"], [11, "how-to-export"], [12, "how-to-export"], [13, "how-to-export"]], "How to use the exported model": [[9, "how-to-use-the-exported-model"], [11, "how-to-use-the-exported-model"], [12, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[9, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[10, "export-to-ncnn"]], "Export to ONNX": [[11, "export-to-onnx"]], "Export model with torch.jit.script()": [[12, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[13, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[13, "how-to-use-the-exported-models"]], "Model export": [[14, "model-export"]], "Conformer CTC": [[15, "conformer-ctc"], [20, "conformer-ctc"]], "Configurable options": [[15, "configurable-options"], [18, "configurable-options"], [20, "configurable-options"], [22, "configurable-options"], [24, "configurable-options"], [25, "configurable-options"], [34, "configurable-options"], [35, "configurable-options"], [36, "configurable-options"]], "Pre-configured options": [[15, "pre-configured-options"], [18, "pre-configured-options"], [20, "pre-configured-options"], [22, "pre-configured-options"], [24, "pre-configured-options"], [25, "pre-configured-options"], [34, "pre-configured-options"], [35, "pre-configured-options"], [36, "pre-configured-options"]], "Training logs": [[15, "training-logs"], [17, "training-logs"], [18, "training-logs"], [20, "training-logs"], [22, "training-logs"], [24, "training-logs"], [25, "training-logs"], [34, "training-logs"], [35, "training-logs"], [36, "training-logs"]], "Usage examples": [[15, "usage-examples"], [17, "usage-examples"], [18, "usage-examples"], [20, "usage-examples"]], "Case 1": [[15, "case-1"], [17, "case-1"], [18, "case-1"], [20, "case-1"]], "Case 2": [[15, "case-2"], [17, "case-2"], [18, "case-2"], [20, "case-2"]], "Case 3": [[15, "case-3"], [17, "case-3"], [20, "case-3"]], "Pre-trained Model": [[15, "pre-trained-model"], [17, "pre-trained-model"], [18, "pre-trained-model"], [20, "pre-trained-model"], [23, "pre-trained-model"], [27, "pre-trained-model"], [28, "pre-trained-model"], [30, "pre-trained-model"]], "Install kaldifeat": [[15, "install-kaldifeat"], [17, "install-kaldifeat"], [18, "install-kaldifeat"], [20, "install-kaldifeat"], [23, "install-kaldifeat"], [27, "install-kaldifeat"], [28, "install-kaldifeat"]], "Download the pre-trained model": [[15, "download-the-pre-trained-model"], [17, "download-the-pre-trained-model"], [18, "download-the-pre-trained-model"], [20, "download-the-pre-trained-model"], [23, "download-the-pre-trained-model"], [27, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"]], "Usage": [[15, "usage"], [17, "usage"], [18, "usage"], [20, "usage"]], "CTC decoding": [[15, "ctc-decoding"], [20, "ctc-decoding"], [20, "id2"]], "HLG decoding": [[15, "hlg-decoding"], [15, "id2"], [18, "hlg-decoding"], [20, "hlg-decoding"], [20, "id3"]], "HLG decoding + attention decoder rescoring": [[15, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[15, "colab-notebook"], [17, "colab-notebook"], [18, "colab-notebook"], [20, "colab-notebook"], [23, "colab-notebook"], [27, "colab-notebook"], [28, "colab-notebook"], [30, "colab-notebook"]], "Deployment with C++": [[15, "deployment-with-c"], [20, "deployment-with-c"]], "aishell": [[16, "aishell"]], "Stateless Transducer": [[17, "stateless-transducer"]], "The Model": [[17, "the-model"]], "The Loss": [[17, "the-loss"]], "Todo": [[17, "id1"]], "Greedy search": [[17, "greedy-search"]], "Beam search": [[17, "beam-search"]], "Modified Beam search": [[17, "modified-beam-search"]], "TDNN-LSTM CTC": [[18, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[19, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[20, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[20, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[20, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[20, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[20, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "LibriSpeech": [[21, "librispeech"], [33, "librispeech"]], "Pruned transducer statelessX": [[22, "pruned-transducer-statelessx"], [35, "pruned-transducer-statelessx"]], "Usage example": [[22, "usage-example"], [24, "usage-example"], [25, "usage-example"], [34, "usage-example"], [35, "usage-example"], [36, "usage-example"]], "Export Model": [[22, "export-model"], [35, "export-model"], [36, "export-model"]], "Export model using torch.jit.script()": [[22, "export-model-using-torch-jit-script"], [24, "export-model-using-torch-jit-script"], [25, "export-model-using-torch-jit-script"], [35, "export-model-using-torch-jit-script"], [36, "export-model-using-torch-jit-script"]], "Download pretrained models": [[22, "download-pretrained-models"], [24, "download-pretrained-models"], [25, "download-pretrained-models"], [34, "download-pretrained-models"], [35, "download-pretrained-models"], [36, "download-pretrained-models"]], "Deploy with Sherpa": [[22, "deploy-with-sherpa"], [35, "deploy-with-sherpa"], [36, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[23, "tdnn-lstm-ctc"], [28, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[23, "inference-with-a-pre-trained-model"], [27, "inference-with-a-pre-trained-model"], [28, "inference-with-a-pre-trained-model"], [30, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[24, "zipformer-ctc-blank-skip"]], "Export models": [[24, "export-models"], [25, "export-models"], [34, "export-models"]], "Zipformer MMI": [[25, "zipformer-mmi"]], "TIMIT": [[26, "timit"]], "TDNN-LiGRU-CTC": [[27, "tdnn-ligru-ctc"]], "YesNo": [[29, "yesno"]], "TDNN-CTC": [[30, "tdnn-ctc"]], "Download kaldifeat": [[30, "download-kaldifeat"]], "Streaming ASR": [[31, "streaming-asr"]], "Introduction": [[32, "introduction"]], "Streaming Conformer": [[32, "streaming-conformer"]], "Streaming Emformer": [[32, "streaming-emformer"]], "LSTM Transducer": [[34, "lstm-transducer"]], "Which model to use": [[34, "which-model-to-use"]], "Export model using torch.jit.trace()": [[34, "export-model-using-torch-jit-trace"], [36, "export-model-using-torch-jit-trace"]], "Export model for ncnn": [[34, "export-model-for-ncnn"]], "Simulate streaming decoding": [[35, "simulate-streaming-decoding"], [36, "simulate-streaming-decoding"]], "Real streaming decoding": [[35, "real-streaming-decoding"], [36, "real-streaming-decoding"]], "Zipformer Transducer": [[36, "zipformer-transducer"]], "Recipes": [[37, "recipes"]], "Table of Contents": [[37, null]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "us": [0, 1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 18, 19, 21, 24, 28, 29, 31, 33], "tool": [0, 4, 35], "make": [0, 1, 3, 16, 18, 21, 33, 35], "consist": [0, 18, 23, 35, 36, 37], "possibl": [0, 2, 3, 9, 16, 21], "black": 0, "format": [0, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "flake8": 0, "check": [0, 21], "qualiti": [0, 17], "isort": 0, "sort": [0, 9], "import": [0, 4, 36, 37], "The": [0, 1, 2, 4, 7, 9, 10, 11, 16, 17, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "version": [0, 8, 9, 10, 16, 18, 19, 21, 23, 24, 28, 29, 35, 36], "abov": [0, 9, 10, 16, 17, 18, 19, 21, 23, 25, 26, 31, 33, 35, 36, 37], "ar": [0, 1, 3, 9, 10, 12, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37, 38], "22": [0, 21, 28, 29, 31], "3": [0, 4, 8, 10, 19, 23, 24, 25, 26, 31, 35, 36, 37], "0": [0, 1, 4, 8, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "5": [0, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "4": [0, 4, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "10": [0, 4, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "1": [0, 4, 8, 10, 12, 13, 14, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "after": [0, 1, 7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "run": [0, 2, 4, 7, 9, 12, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "command": [0, 1, 4, 9, 10, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "git": [0, 9, 10, 16, 18, 19, 21, 24, 28, 29, 31, 35], "clone": [0, 9, 10, 16, 18, 19, 21, 24, 28, 29, 31, 35], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "github": [0, 2, 6, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "com": [0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "k2": [0, 2, 6, 7, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "fsa": [0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 18, 21, 23, 25, 26, 35, 36, 37], "icefal": [0, 2, 3, 4, 6, 7, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "cd": [0, 1, 2, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "pip": [0, 1, 4, 9, 18, 35], "instal": [0, 1, 4, 5, 7, 8, 10, 23, 25, 26, 31, 35, 36, 37], "pre": [0, 3, 5, 7, 8, 9], "commit": 0, "whenev": 0, "you": [0, 1, 2, 6, 7, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "automat": [0, 7], "hook": 0, "invok": 0, "fail": [0, 9], "If": [0, 2, 7, 9, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "ani": [0, 9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "your": [0, 1, 2, 5, 7, 8, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "wa": [0, 9, 10, 21, 24], "success": [0, 9], "pleas": [0, 1, 2, 7, 9, 11, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "fix": [0, 4, 9, 21], "issu": [0, 4, 9, 21, 36, 37], "report": [0, 4, 9], "some": [0, 1, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "i": [0, 1, 2, 4, 7, 9, 10, 11, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "e": [0, 2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "modifi": [0, 16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "file": [0, 2, 7, 8, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "place": [0, 9, 10, 18, 21, 24], "so": [0, 7, 8, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "statu": 0, "failur": 0, "see": [0, 1, 7, 9, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "which": [0, 2, 7, 10, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 36, 37], "ha": [0, 2, 8, 11, 12, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 33, 35, 36, 37], "been": [0, 11, 12, 18], "befor": [0, 1, 10, 13, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "further": 0, "chang": [0, 4, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "all": [0, 6, 7, 10, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "again": [0, 31], "should": [0, 2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "succe": 0, "thi": [0, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "time": [0, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "succeed": 0, "want": [0, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "can": [0, 1, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "do": [0, 2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "Or": 0, "without": [0, 5, 7, 16, 21, 35], "your_changed_fil": 0, "py": [0, 2, 4, 9, 12, 13, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 6, 7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "prepar": [1, 3, 10], "environ": [1, 16, 17, 18, 19, 21, 23, 24, 28, 29, 31, 35, 36, 37], "doc": [1, 10], "r": [1, 9, 28, 29], "requir": [1, 9, 36, 37], "txt": [1, 9, 16, 18, 19, 21, 24, 28, 29, 31], "set": [1, 9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "up": [1, 9, 10, 16, 19, 21, 23, 24, 25, 26, 36, 37], "readi": [1, 16, 21], "refer": [1, 2, 9, 10, 11, 13, 14, 16, 18, 19, 21, 23, 24, 25, 28, 29, 31, 33, 36, 37], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 10, 16, 18, 21, 35], "local": [1, 9, 23, 25, 26, 35, 36, 37], "preview": 1, "what": [1, 2, 9, 18, 33], "look": [1, 2, 6, 9, 16, 18, 19, 21], "like": [1, 2, 7, 9, 16, 18, 19, 21, 23, 25, 26, 31, 33, 35, 36], "publish": [1, 10, 17], "html": [1, 2, 4, 9, 13, 14, 23, 35, 36, 37], "gener": [1, 10, 12, 13, 14, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "view": [1, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "follow": [1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "python3": [1, 4, 9], "m": [1, 18, 23, 25, 26, 28, 29, 35, 36, 37], "server": [1, 7, 9, 35], "It": [1, 2, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "print": [1, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "serv": [1, 23, 25, 26, 35, 36, 37], "port": [1, 23, 25, 26, 35, 36, 37], "8000": [1, 31], "open": [1, 8, 10, 17, 18, 21], "browser": [1, 5, 7, 23, 25, 26, 35, 36, 37], "go": [1, 16, 18, 21, 23, 25, 26, 35, 36, 37], "read": [2, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "code": [2, 3, 4, 8, 16, 21, 23, 24, 28, 29, 31, 36, 37], "style": [2, 3, 8], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 10, 13, 14, 16, 18, 21, 23, 25, 26, 35, 36, 37], "recommend": [2, 9, 16, 18, 19, 21, 23, 36, 37], "test": [2, 8, 10, 11, 12, 16, 18, 19, 21, 24, 25, 28, 29], "valid": [2, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "dataset": [2, 4, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "lhots": [2, 8, 10, 16, 18, 21], "readthedoc": [2, 9], "io": [2, 9, 13, 14, 23, 35, 36, 37], "en": [2, 9], "latest": [2, 7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "index": [2, 9, 13, 14, 35, 36, 37], "yesno": [2, 4, 8, 9, 20, 31, 38], "veri": [2, 3, 18, 28, 29, 31, 36, 37], "good": 2, "exampl": [2, 7, 8, 10, 12, 13, 14, 24, 28, 29, 31], "speech": [2, 7, 8, 9, 11, 12, 17, 18, 31, 38], "pull": [2, 16, 18, 21, 33], "380": [2, 29], "show": [2, 7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "add": [2, 16, 18, 19, 36, 38], "new": [2, 3, 7, 9, 16, 17, 18, 19, 21, 23, 24, 25, 26, 31, 35, 36, 37], "suppos": [2, 36, 37], "would": [2, 9, 10, 21, 24, 36, 37], "name": [2, 10, 16, 18, 23, 25, 26, 36, 37], "foo": [2, 12, 14, 16, 21, 23, 25, 26, 35, 36, 37], "eg": [2, 4, 6, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "mkdir": [2, 16, 18, 19, 21, 24, 28, 29, 31, 35], "p": [2, 9, 18, 28, 29, 35], "asr": [2, 4, 6, 8, 9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "touch": 2, "sh": [2, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "chmod": 2, "x": [2, 33], "simpl": [2, 18], "own": [2, 23, 36, 37], "otherwis": [2, 16, 18, 21, 23, 25, 26, 35, 36, 37], "librispeech": [2, 6, 8, 10, 12, 13, 14, 20, 21, 23, 24, 25, 26, 32, 33, 35, 36, 37, 38], "assum": [2, 9, 10, 16, 18, 19, 21, 23, 24, 28, 29, 31, 35, 36, 37], "fanci": 2, "call": [2, 4], "bar": [2, 12, 14, 16, 21, 23, 25, 26, 35, 36, 37], "organ": 2, "wai": [2, 3, 15, 23, 25, 26, 33, 35, 36, 37], "readm": [2, 16, 18, 19, 21, 24, 28, 29, 31], "md": [2, 6, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "asr_datamodul": [2, 4, 9], "pretrain": [2, 10, 12, 14, 16, 18, 19, 21, 24, 28, 29, 31], "For": [2, 6, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "instanc": [2, 6, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "tdnn": [2, 4, 9, 17, 20, 22, 27, 30], "its": [2, 10, 14, 18, 25], "directori": [2, 8, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "structur": 2, "descript": [2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "contain": [2, 8, 10, 11, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37, 38], "inform": [2, 10, 16, 18, 19, 21, 23, 24, 25, 28, 29, 31, 33, 35, 36, 37], "g": [2, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "wer": [2, 9, 10, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "etc": [2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "provid": [2, 7, 9, 10, 11, 12, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37, 38], "pytorch": [2, 4, 8, 18], "dataload": [2, 9], "take": [2, 10, 23, 31, 36, 37], "input": [2, 10, 16, 18, 19, 21, 24, 28, 29, 31, 33], "checkpoint": [2, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "save": [2, 9, 10, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "dure": [2, 7, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "stage": [2, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "": [2, 9, 10, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "definit": 2, "neural": [2, 16, 21], "network": [2, 16, 18, 21, 23, 25, 26, 35, 36, 37], "script": [2, 8, 9, 14, 15, 16, 18, 19, 21, 24, 28, 29, 31, 35], "infer": [2, 10, 12], "tdnn_lstm_ctc": [2, 19, 24, 29], "conformer_ctc": [2, 16, 21], "get": [2, 7, 9, 16, 18, 19, 21, 23, 24, 25, 26, 31, 35, 36, 37], "feel": [2, 35], "result": [2, 6, 7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "everi": [2, 10, 23, 25, 26, 35, 36, 37], "kept": [2, 23, 36, 37], "self": [2, 11, 33], "toler": 2, "duplic": 2, "among": [2, 9], "differ": [2, 9, 16, 17, 21, 23, 33, 35, 36, 37], "invoc": 2, "help": [2, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "blob": [2, 6, 10, 14, 23, 25, 26, 35, 36, 37], "master": [2, 6, 10, 12, 13, 14, 18, 23, 25, 26, 35, 36, 37], "transform": [2, 16, 21, 35], "conform": [2, 12, 13, 17, 18, 20, 22, 23, 25, 35, 36, 37], "base": [2, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "lstm": [2, 11, 14, 17, 20, 22, 27, 32, 34], "attent": [2, 18, 19, 33, 36, 37], "lm": [2, 9, 18, 23, 24, 28, 29, 31, 36, 37], "rescor": [2, 19, 24, 26, 28, 29, 31], "demonstr": [2, 5, 7, 10], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "mani": [3, 36, 37], "two": [3, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "them": [3, 5, 6, 7, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "To": [3, 7, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "document": [3, 8, 10, 26], "repositori": 3, "recip": [3, 6, 8, 9, 10, 16, 18, 19, 21, 23, 24, 28, 29, 31, 33, 35, 36, 37], "In": [3, 4, 7, 9, 10, 12, 13, 14, 15, 16, 18, 19, 21, 24, 28, 29, 31, 33], "page": [3, 7, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "describ": [3, 5, 10, 12, 13, 14, 15, 16, 18, 19, 21, 23, 24, 28, 29, 36, 37], "how": [3, 5, 7, 8, 9, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "creat": [3, 8, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36], "data": [3, 10, 12, 13, 14, 17], "train": [3, 4, 5, 7, 8, 10, 13, 14, 33], "decod": [3, 4, 7, 12, 14, 15], "model": [3, 5, 7, 8, 9, 11, 33], "section": [4, 5, 9, 10, 12, 13, 14, 15, 16, 21], "collect": [4, 9], "user": 4, "post": 4, "correspond": [4, 6, 7], "solut": 4, "One": 4, "torch": [4, 8, 9, 10, 15, 16, 18, 21], "torchaudio": [4, 8, 33], "cu111": 4, "torchvis": 4, "11": [4, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "f": [4, 9, 28, 29], "download": [4, 7, 8, 17], "org": [4, 9, 17, 18, 23, 35, 36, 37], "whl": [4, 9], "torch_stabl": 4, "throw": 4, "error": [4, 9, 21], "when": [4, 7, 15, 18, 21, 23, 25, 26, 36, 37], "specifi": [4, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "cuda": [4, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "while": [4, 9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "That": [4, 23, 35, 36, 37], "cu11": 4, "therefor": 4, "correct": 4, "log": [4, 9, 24, 28, 29, 31], "traceback": 4, "most": [4, 36, 37], "recent": 4, "last": 4, "line": [4, 9, 23, 36, 37], "14": [4, 9, 10, 12, 13, 16, 21, 23, 24, 25, 28, 35, 36, 37], "from": [4, 5, 7, 9, 10, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "yesnoasrdatamodul": 4, "home": [4, 16, 21], "xxx": [4, 10], "next": [4, 7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "gen": [4, 7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "kaldi": [4, 7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "34": [4, 9], "datamodul": 4, "__init__": [4, 10, 16, 18, 21], "23": [4, 9, 16, 18, 19, 21, 28, 29, 31], "util": [4, 9, 21], "add_eo": 4, "add_so": 4, "get_text": 4, "39": [4, 9, 18, 21, 24, 28], "tensorboard": [4, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "summarywrit": 4, "miniconda3": 4, "env": 4, "yyi": 4, "lib": [4, 9], "8": [4, 9, 10, 16, 18, 21, 23, 24, 25, 26, 31, 35, 36, 37], "site": [4, 9], "packag": [4, 9], "loosevers": 4, "uninstal": 4, "setuptool": [4, 9], "58": [4, 21], "find": [5, 6, 7, 9, 10, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "also": [5, 6, 9, 10, 11, 12, 14, 16, 18, 19, 21, 23, 25, 26, 31, 33, 35, 36, 37], "try": [5, 7, 23, 25, 26, 35, 36, 37], "within": [5, 7], "anyth": [5, 7], "space": [5, 8], "youtub": [5, 8, 21, 23, 24, 25, 26, 35, 36, 37], "video": [5, 8, 21, 23, 24, 25, 26, 35, 36, 37], "upload": [6, 7, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "huggingfac": [6, 8, 10, 16, 18, 19, 21, 24, 25, 26, 28, 29, 31, 35], "co": [6, 7, 10, 16, 17, 18, 19, 21, 24, 25, 26, 28, 29, 31, 35], "visit": [6, 7, 23, 25, 26, 35, 36, 37], "link": [6, 9, 10, 11, 23, 25, 26, 35, 36, 37], "search": [6, 7], "specif": [6, 18], "aishel": [6, 8, 16, 18, 19, 20, 38], "gigaspeech": [6, 13, 35], "wenetspeech": [6, 13], "integr": 7, "framework": [7, 12, 23, 36], "sherpa": [7, 11, 12, 13, 14, 35], "need": [7, 9, 10, 11, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "window": [7, 11, 12, 35], "maco": [7, 11, 12, 35], "linux": [7, 11, 12, 35], "even": [7, 9], "ipad": 7, "phone": 7, "start": [7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "address": [7, 10, 18, 23, 26, 35, 36, 37], "recognit": [7, 8, 11, 12, 17, 18, 31, 38], "screenshot": [7, 16, 18, 19, 21, 23, 31, 35, 36], "select": [7, 23, 24, 28, 29, 31, 35, 36, 37], "languag": [7, 16, 18, 19], "current": [7, 9, 18, 33, 35, 36, 37, 38], "chines": [7, 17, 18], "english": [7, 31, 35], "target": 7, "method": [7, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "greedi": 7, "modified_beam_search": [7, 18, 23, 25, 35, 36, 37], "choos": [7, 9, 23, 25, 26, 35, 36, 37], "number": [7, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "activ": 7, "path": [7, 10, 12, 14, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "either": [7, 16, 18, 19, 21, 36, 37], "record": [7, 16, 17, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "click": [7, 9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "button": 7, "submit": 7, "wait": 7, "moment": 7, "an": [7, 9, 10, 12, 13, 14, 16, 17, 18, 21, 23, 26, 31, 35, 36, 37], "bottom": [7, 23, 25, 26, 35, 36, 37], "part": [7, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "tabl": 7, "one": [7, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "subscrib": [7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "channel": [7, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "nadira": [7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "povei": [7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "www": [7, 9, 17, 21, 23, 24, 25, 26, 35, 36, 37], "uc_vaumpkminz1pnkfxan9mw": [7, 9, 21, 23, 24, 25, 26, 35, 36, 37], "2": [8, 10, 12, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "frequent": 8, "ask": 8, "question": 8, "faq": 8, "oserror": 8, "libtorch_hip": 8, "cannot": 8, "share": [8, 9], "object": [8, 9, 16, 18, 19, 23, 31, 35, 36], "attributeerror": 8, "modul": [8, 9, 25, 36], "distutil": 8, "attribut": [8, 21], "export": [8, 9, 16, 18, 19, 21, 24, 28, 29, 31], "state_dict": [8, 15, 16, 18, 19, 21, 24, 28, 29, 31], "jit": [8, 15, 21], "trace": [8, 13, 15], "onnx": [8, 10, 15], "ncnn": [8, 15], "non": [8, 12, 21, 33, 36, 38], "stream": [8, 12, 16, 21, 28, 29, 35, 38], "timit": [8, 20, 28, 29, 38], "introduct": [8, 32, 38], "contribut": 8, "depend": [9, 16, 21, 35], "step": [9, 10, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "order": [9, 16, 19, 21, 24, 28, 29], "matter": 9, "least": 9, "v1": [9, 16, 19, 21, 24, 28, 29], "9": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 31, 35, 36, 37], "alreadi": [9, 10], "don": [9, 13, 16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "t": [9, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "replac": 9, "compil": [9, 16, 18, 21], "against": 9, "strongli": 9, "variabl": [9, 16, 19, 21, 23, 25, 26, 35, 36, 37], "pythonpath": [9, 35], "point": [9, 10, 16, 19, 21, 23, 25, 26, 35, 36, 37], "folder": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "tmp": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "setup": [9, 16, 18, 19, 21, 23, 24, 28, 29, 31, 35, 36, 37], "put": [9, 25, 36], "sever": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "same": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "switch": [9, 16, 21, 26], "just": [9, 33], "about": [9, 18, 23, 26, 35, 36, 37], "virtualenv": 9, "cpython3": 9, "6": [9, 16, 18, 21, 23, 24, 28, 29, 35], "final": [9, 10, 21, 24], "64": [9, 10, 18, 36], "1540m": 9, "creator": 9, "cpython3posix": 9, "dest": 9, "ceph": [9, 10, 16, 18, 21], "fj": [9, 10, 18, 21], "fangjun": [9, 10, 18, 21], "clear": 9, "fals": [9, 10, 16, 18, 21], "no_vcs_ignor": 9, "global": 9, "seeder": 9, "fromappdata": 9, "bundl": 9, "wheel": [9, 35], "via": [9, 13, 14], "copi": [9, 33], "app_data_dir": 9, "root": 9, "v": [9, 21, 28, 29], "irtualenv": 9, "ad": [9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "seed": 9, "21": [9, 10, 16, 18, 21, 28, 29], "57": [9, 21, 24], "36": [9, 18, 21], "bashactiv": 9, "cshellactiv": 9, "fishactiv": 9, "powershellactiv": 9, "pythonactiv": 9, "xonshactiv": 9, "sourc": [9, 10, 16, 17, 18, 21], "bin": [9, 16, 21, 35], "dev20210822": 9, "cpu": [9, 10, 13, 16, 23, 25, 26, 31, 36, 37], "torch1": 9, "nightli": 9, "2bcpu": 9, "cp38": 9, "linux_x86_64": 9, "mb": 9, "________________________________": 9, "185": [9, 16, 21, 31], "kb": [9, 28, 29], "graphviz": 9, "17": [9, 10, 16, 21, 28, 29, 35], "py3": 9, "none": [9, 16, 21], "18": [9, 16, 18, 19, 21, 23, 24, 28, 29, 35, 36, 37], "cach": 9, "manylinux1_x86_64": 9, "831": [9, 18, 29], "type": [9, 10, 16, 18, 21, 23, 25, 26, 31, 33, 35, 36, 37], "extens": 9, "typing_extens": 9, "26": [9, 18, 21, 29], "successfulli": 9, "probabl": [9, 18, 23, 25, 35, 36, 37], "req": 9, "7b1b76ge": 9, "q": 9, "audioread": 9, "soundfil": 9, "post1": 9, "py2": 9, "7": [9, 10, 16, 19, 21, 23, 24, 28, 29, 35, 36], "97": [9, 16], "cytoolz": 9, "manylinux_2_17_x86_64": 9, "manylinux2014_x86_64": 9, "dataclass": 9, "h5py": 9, "manylinux_2_12_x86_64": 9, "manylinux2010_x86_64": 9, "684": [9, 16, 31], "intervaltre": 9, "lilcom": 9, "numpi": 9, "15": [9, 10, 18, 19, 21, 28, 31], "40": [9, 19, 21, 24, 28, 29], "pyyaml": 9, "662": 9, "tqdm": 9, "62": [9, 21, 24], "76": [9, 31], "73": 9, "satisfi": 9, "dev": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "2a1410b": 9, "clean": [9, 16, 18, 21, 23, 24, 25, 26, 35, 36, 37], "toolz": 9, "55": [9, 19, 21, 28], "sortedcontain": 9, "29": [9, 16, 18, 19, 21, 24, 28, 29], "cffi": 9, "411": [9, 21], "pycpars": 9, "20": [9, 10, 16, 18, 19, 21, 23, 24, 28, 29, 31, 36], "112": 9, "pypars": 9, "67": 9, "done": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "filenam": [9, 12, 13, 14, 25, 26, 35, 37], "dev_2a1410b_clean": 9, "size": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "342242": 9, "sha256": 9, "f683444afa4dc0881133206b4646a": 9, "9d0f774224cc84000f55d0a67f6e4a37997": 9, "store": [9, 21], "ephem": 9, "ftu0qysz": 9, "7f": 9, "7a": 9, "8e": 9, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 9, "warn": 9, "built": 9, "invalid": [9, 21], "metadata": [9, 28, 29], "mandat": 9, "pep": 9, "440": 9, "packa": 9, "ging": 9, "deprec": [9, 18], "legaci": 9, "becaus": 9, "could": [9, 16, 19], "A": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 35, 36, 37], "discuss": 9, "regard": 9, "pypa": 9, "sue": 9, "8368": 9, "inter": 9, "valtre": 9, "sor": 9, "tedcontain": 9, "remot": 9, "enumer": 9, "500": [9, 10, 18, 21, 26, 35], "count": 9, "100": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "compress": 9, "308": [9, 16, 18, 19], "total": [9, 16, 18, 19, 21, 23, 24, 31, 35, 36], "delta": 9, "263": 9, "reus": 9, "307": 9, "102": [9, 16], "pack": [9, 36, 37], "receiv": 9, "172": 9, "49": [9, 21, 29, 31], "kib": 9, "385": 9, "00": [9, 16, 18, 19, 21, 24, 28, 29, 31], "resolv": 9, "kaldilm": 9, "tar": 9, "gz": 9, "48": [9, 16, 18], "574": 9, "kaldialign": 9, "sentencepiec": [9, 21], "96": 9, "41": [9, 16, 18, 28, 31], "absl": 9, "absl_pi": 9, "13": [9, 10, 18, 19, 21, 24, 25, 28], "132": 9, "googl": [9, 23, 25, 26, 35, 36, 37], "auth": 9, "oauthlib": 9, "google_auth_oauthlib": 9, "grpcio": 9, "24": [9, 19, 24, 28, 29, 31], "ment": 9, "12": [9, 10, 16, 18, 19, 21, 23, 25, 26, 28, 31, 35, 36, 37], "requi": 9, "rement": 9, "protobuf": 9, "manylinux_2_5_x86_64": 9, "werkzeug": 9, "288": 9, "tensorboard_data_serv": 9, "google_auth": 9, "35": [9, 10, 18, 21, 35], "152": 9, "request": [9, 33], "plugin": 9, "wit": 9, "tensorboard_plugin_wit": 9, "781": 9, "markdown": 9, "six": 9, "16": [9, 10, 14, 16, 18, 19, 21, 23, 24, 28, 29, 31, 35, 36, 37], "cachetool": 9, "rsa": 9, "pyasn1": 9, "pyasn1_modul": 9, "155": 9, "requests_oauthlib": 9, "77": [9, 21], "urllib3": 9, "27": [9, 16, 18, 24, 29], "138": [9, 16, 18], "certifi": 9, "2017": 9, "2021": [9, 16, 19, 21, 24, 28, 29, 31], "30": [9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "145": 9, "charset": 9, "normal": [9, 24, 28, 29, 31, 36], "charset_norm": 9, "idna": 9, "59": [9, 19, 21], "146": 9, "897233": 9, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 9, "f4c0d0cbc66eee6c88d68a63862": 9, "85": 9, "7d": 9, "63": [9, 18], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 9, "etool": 9, "oaut": 9, "hlib": 9, "let": [9, 16, 21, 35], "u": [9, 16, 18, 19, 21, 31, 35], "08": [9, 21, 24, 26, 28, 29, 31, 35], "19": [9, 10, 16, 21, 24, 28, 29], "main": [9, 16, 21, 33], "dl_dir": [9, 16, 19, 21, 23, 25, 26, 35, 36, 37], "waves_yesno": 9, "49mb": 9, "03": [9, 10, 18, 21, 28, 29, 35], "39mb": 9, "manifest": 9, "31": [9, 21], "42": [9, 16, 21, 31], "comput": [9, 10, 16, 18, 19, 23, 24, 26, 28, 29, 31, 35, 36, 37], "fbank": [9, 10, 16, 18, 19, 21, 24, 28, 29, 31], "32": [9, 16, 18, 19, 37], "803": 9, "info": [9, 10, 16, 18, 19, 21, 24, 28, 29, 31], "compute_fbank_yesno": 9, "52": [9, 16, 21], "process": [9, 10, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "extract": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "featur": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "_______________________________________________________________": 9, "90": 9, "01": [9, 18, 19, 21], "80": [9, 10, 16, 18, 21], "57it": 9, "085": 9, "______________________________________________________________": 9, "248": [9, 18], "21it": 9, "lang": [9, 10, 18, 21, 26], "fcordre9": 9, "kaldilm_6899d26f2d684ad48f21025950cd2866": 9, "csrc": [9, 21], "arpa_file_pars": 9, "cc": 9, "void": 9, "arpafilepars": 9, "rea": 9, "d": [9, 28, 29], "std": 9, "istream": 9, "79": 9, "140": [9, 19], "gram": [9, 16, 18, 19, 23, 24, 26, 28, 29, 36, 37], "89": [9, 16], "hlg": [9, 24, 28, 29, 31], "928": 9, "compile_hlg": 9, "120": 9, "lang_phon": [9, 19, 24, 28, 29, 31], "929": [9, 18], "lexicon": [9, 16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "116": 9, "convert": [9, 21, 35], "l": [9, 18, 28, 29, 31], "pt": [9, 10, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "linv": [9, 18, 21, 31], "931": 9, "ctc_topo": 9, "max_token_id": 9, "932": 9, "load": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "fst": [9, 18, 31], "intersect": [9, 23, 36, 37], "933": 9, "lg": [9, 23, 26, 36, 37], "shape": 9, "66": 9, "connect": [9, 10, 21, 23, 24, 35, 36, 37], "68": [9, 21], "70": 9, "class": [9, 21], "tensor": [9, 16, 18, 19, 21, 23, 31, 35, 36], "71": [9, 21, 24], "determin": 9, "934": 9, "74": [9, 10], "_k2": 9, "raggedint": 9, "remov": [9, 16, 18, 19, 21, 24, 28, 29], "disambigu": 9, "symbol": [9, 18, 23, 36, 37], "87": 9, "remove_epsilon": 9, "935": 9, "92": [9, 21], "arc": 9, "95": [9, 17], "compos": 9, "h": 9, "105": [9, 21], "936": 9, "107": [9, 24], "123": 9, "now": [9, 16, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "cuda_visible_devic": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "gpu": [9, 16, 18, 19, 21, 23, 25, 26, 28, 29, 31, 35, 36, 37], "avail": [9, 10, 16, 18, 21, 24, 28, 29, 31, 35], "case": [9, 10, 23, 25, 26, 35, 36, 37], "segment": 9, "fault": 9, "core": 9, "dump": 9, "protocol_buffers_python_implement": 9, "more": [9, 16, 21, 31, 33, 35, 36], "674": 9, "interest": [9, 23, 25, 26, 35, 36, 37], "given": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 36, 37], "below": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36], "072": 9, "465": 9, "466": 9, "exp_dir": [9, 18, 21, 23, 25, 26, 36, 37], "posixpath": [9, 18, 21], "exp": [9, 10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "lang_dir": [9, 18, 21], "lr": [9, 18, 35], "feature_dim": [9, 10, 16, 18, 21, 31], "weight_decai": 9, "1e": 9, "06": [9, 10, 19, 21, 24, 31], "start_epoch": 9, "best_train_loss": [9, 10], "inf": [9, 10], "best_valid_loss": [9, 10], "best_train_epoch": [9, 10], "best_valid_epoch": [9, 10], "batch_idx_train": [9, 10], "log_interv": [9, 10], "valid_interv": [9, 10], "beam_siz": [9, 10, 18], "reduct": [9, 25], "sum": 9, "use_doub": 9, "le_scor": 9, "true": [9, 10, 16, 18, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "world_siz": 9, "master_port": 9, "12354": 9, "num_epoch": 9, "feature_dir": [9, 21], "max_dur": [9, 21], "bucketing_sampl": [9, 21], "num_bucket": [9, 21], "concatenate_cut": [9, 21], "duration_factor": [9, 21], "gap": [9, 21], "on_the_fly_feat": [9, 21], "shuffl": [9, 21], "return_cut": [9, 21], "num_work": [9, 21], "074": 9, "113": [9, 18, 21], "098": [9, 24], "cut": [9, 21], "240": [9, 16, 31], "149": [9, 21], "200": [9, 10, 16, 21, 28, 29, 31], "singlecutsampl": 9, "206": [9, 21], "219": [9, 18, 21], "246": [9, 18, 21, 28, 29], "357": 9, "416": 9, "epoch": [9, 10, 12, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "batch": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "avg": [9, 10, 12, 13, 14, 18, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "loss": [9, 16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "0789": 9, "848": 9, "5356": 9, "7556": 9, "301": [9, 10, 21], "432": [9, 21], "9972": 9, "best": [9, 16, 19, 21], "805": 9, "2436": 9, "5717": 9, "33": [9, 16, 17, 18, 21, 28], "109": [9, 16, 21], "4167": 9, "121": [9, 24], "325": 9, "2214": 9, "798": [9, 18], "0781": 9, "1343": 9, "065": 9, "0859": 9, "556": 9, "0421": 9, "0975": 9, "810": 9, "0431": 9, "824": 9, "657": 9, "0109": 9, "984": [9, 21], "0093": 9, "0096": 9, "50": [9, 10, 21, 23, 28, 35, 36, 37], "239": [9, 18], "0104": 9, "0101": 9, "569": 9, "0092": 9, "819": [9, 28], "835": 9, "51": [9, 16, 21, 31], "024": 9, "0105": 9, "317": 9, "0099": 9, "0097": 9, "552": 9, "0108": 9, "869": 9, "0102": 9, "126": [9, 21], "128": [9, 21], "537": [9, 21], "192": [9, 21], "249": 9, "250": [9, 18, 24], "lm_dir": [9, 21], "search_beam": [9, 16, 21, 31], "output_beam": [9, 16, 21, 31], "min_active_st": [9, 16, 21, 31], "max_active_st": [9, 16, 21, 31], "10000": [9, 16, 21, 31], "use_double_scor": [9, 16, 21, 31], "193": 9, "213": [9, 31], "259": [9, 16], "devic": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 36, 37], "217": [9, 16, 21], "279": [9, 21], "averag": [9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "userwarn": [9, 18], "floor_divid": 9, "futur": [9, 18, 38], "round": [9, 18], "toward": [9, 18], "trunc": [9, 18], "function": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "NOT": [9, 16, 18, 21, 31], "floor": [9, 18], "incorrect": [9, 18], "neg": [9, 18], "valu": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "keep": [9, 18, 23, 36, 37], "behavior": [9, 18], "div": [9, 18], "b": [9, 18, 21, 28, 29], "rounding_mod": [9, 18], "actual": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "divis": [9, 18], "trigger": 9, "intern": 9, "aten": 9, "src": [9, 35], "nativ": 9, "binaryop": 9, "cpp": [9, 13], "450": [9, 16, 18, 19], "k": [9, 23, 28, 29, 35, 36, 37], "n": [9, 16, 23, 25, 26, 28, 29, 35, 36, 37], "220": [9, 18, 19, 21], "409": 9, "190": [9, 24], "until": [9, 21, 25], "571": [9, 21], "228": [9, 21], "transcript": [9, 16, 17, 18, 19, 21, 23, 24, 28, 29, 35, 36, 37], "recog": [9, 18, 21], "test_set": [9, 31], "572": 9, "ins": [9, 21, 31], "del": [9, 21, 31], "sub": [9, 21, 31], "573": 9, "236": 9, "wrote": [9, 21], "detail": [9, 11, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "stat": [9, 21], "err": [9, 18, 21], "299": 9, "congratul": [9, 16, 19, 21, 24, 28, 29, 31], "first": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "fun": 9, "debug": 9, "variou": [9, 15, 38], "problem": 9, "mai": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37, 38], "encount": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "period": 10, "disk": 10, "optim": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "other": [10, 18, 21, 23, 24, 28, 29, 31, 33, 36, 37, 38], "relat": [10, 16, 18, 21, 24, 28, 29, 31], "resum": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "howev": 10, "onli": [10, 12, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37, 38], "strip": 10, "except": 10, "reduc": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "each": [10, 16, 18, 19, 21, 23, 25, 26, 33, 35, 36, 37], "well": [10, 31, 38], "usag": [10, 13, 14, 24, 28, 29, 31], "pruned_transducer_stateless3": [10, 12, 13, 33], "almost": [10, 23, 33, 36, 37], "dir": [10, 12, 13, 14, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "bpe": [10, 12, 13, 14, 21, 23, 25, 26, 35, 36, 37], "lang_bpe_500": [10, 12, 13, 14, 21, 23, 25, 26, 35, 36, 37], "dict": 10, "host": 10, "csukuangfj": [10, 16, 18, 19, 21, 24, 28, 29, 31, 35], "prune": [10, 18, 20, 22, 32, 33, 34, 35, 37], "transduc": [10, 11, 12, 17, 20, 22, 32, 33, 34], "stateless3": 10, "2022": [10, 18, 23, 25, 26, 35, 36], "05": [10, 16, 18, 19, 21, 29], "lf": [10, 16, 18, 19, 21, 24, 26, 28, 29, 31], "repo": 10, "prefix": 10, "those": 10, "wave": [10, 12, 16, 21], "iter": [10, 14, 23, 25, 26, 35, 36, 37], "1224000": 10, "greedy_search": [10, 18, 23, 25, 35, 36, 37], "test_wav": [10, 16, 18, 19, 21, 24, 28, 29, 31], "1089": [10, 21, 24], "134686": [10, 21, 24], "0001": [10, 21, 24], "wav": [10, 12, 14, 16, 18, 19, 21, 23, 25, 26, 28, 29, 31, 35, 36, 37], "1221": [10, 21, 24], "135766": [10, 21, 24], "0002": [10, 21, 24], "multipl": [10, 16, 18, 19, 21, 24, 28, 29, 31], "sound": [10, 14, 16, 18, 19, 21, 24, 28, 29, 31], "Its": [10, 21], "output": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "09": [10, 16, 18, 19, 21, 35], "02": [10, 18, 21, 23, 29, 35, 36], "233": 10, "265": 10, "reset_interv": 10, "3000": 10, "subsampling_factor": [10, 16, 18, 21], "encoder_dim": 10, "512": [10, 16, 18, 21], "nhead": [10, 16, 18, 21, 23, 36], "dim_feedforward": [10, 18], "2048": [10, 18], "num_encoder_lay": [10, 18], "decoder_dim": 10, "joiner_dim": 10, "model_warm_step": 10, "env_info": [10, 16, 18, 21], "releas": [10, 16, 18, 21, 35], "sha1": [10, 16, 18, 21], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 10, "date": [10, 16, 18, 21], "fri": 10, "oct": [10, 21], "miss": [10, 18, 21], "cu102": 10, "branch": [10, 16, 18, 21, 25], "1013": 10, "c39cba5": 10, "dirti": [10, 16, 21], "thu": [10, 18, 21, 24], "jsonl": 10, "hostnam": [10, 18], "de": [10, 18], "74279": [10, 18], "0324160024": 10, "65bfd8b584": 10, "jjlbn": 10, "ip": [10, 18], "177": [10, 18, 19, 21], "203": [10, 21], "bpe_model": [10, 21], "sound_fil": [10, 16, 18, 21, 31], "sample_r": [10, 16, 18, 21, 31], "16000": [10, 16, 18, 19, 21, 24, 25, 28, 29], "beam": [10, 35], "max_context": 10, "max_stat": 10, "context_s": [10, 18], "max_sym_per_fram": [10, 18], "simulate_stream": 10, "decode_chunk_s": 10, "left_context": 10, "dynamic_chunk_train": 10, "causal_convolut": 10, "short_chunk_s": [10, 36, 37], "25": [10, 16, 21, 23, 28, 29, 31, 36], "num_left_chunk": 10, "blank_id": [10, 18], "unk_id": 10, "vocab_s": [10, 18], "271": 10, "273": [10, 18], "612": 10, "458": 10, "disabl": 10, "giga": [10, 35], "623": 10, "277": 10, "paramet": [10, 13, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "78648040": 10, "951": [10, 21], "285": [10, 18, 21], "construct": [10, 16, 18, 19, 21, 24, 28, 29, 31], "952": 10, "295": [10, 16, 18, 19, 21], "957": 10, "700": 10, "329": [10, 21], "912": 10, "388": 10, "earli": [10, 21, 24], "nightfal": [10, 21, 24], "THE": [10, 21, 24], "yellow": [10, 21, 24], "lamp": [10, 21, 24], "light": [10, 21, 24], "here": [10, 16, 18, 19, 21, 24, 33, 35, 36], "AND": [10, 21, 24], "THERE": [10, 21, 24], "squalid": [10, 21, 24], "quarter": [10, 21, 24], "OF": [10, 21, 24], "brothel": [10, 21, 24], "god": [10, 21, 24], "AS": [10, 21, 24], "direct": [10, 21, 24], "consequ": [10, 21, 24], "sin": [10, 21, 24], "man": [10, 21, 24], "punish": [10, 21, 24], "had": [10, 21, 24], "her": [10, 21, 24], "love": [10, 21, 24], "child": [10, 21, 24], "whose": [10, 18, 21, 24], "ON": [10, 21, 24, 35], "THAT": [10, 21, 24], "dishonor": [10, 21, 24], "bosom": [10, 21, 24], "TO": [10, 21, 24], "parent": [10, 21, 24], "forev": [10, 21, 24], "WITH": [10, 21, 24], "race": [10, 21, 24], "descent": [10, 21, 24], "mortal": [10, 21, 24], "BE": [10, 21, 24], "bless": [10, 21, 24], "soul": [10, 21, 24], "IN": [10, 21, 24], "heaven": [10, 21, 24], "yet": [10, 21, 24], "THESE": [10, 21, 24], "thought": [10, 21, 24], "affect": [10, 21, 24], "hester": [10, 21, 24], "prynn": [10, 21, 24], "less": [10, 21, 24, 31, 36, 37], "hope": [10, 17, 21, 24], "than": [10, 16, 18, 19, 21, 23, 24, 25, 26, 31, 35, 36, 37], "apprehens": [10, 21, 24], "390": 10, "alwai": 10, "note": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "down": [10, 16, 21, 23, 25, 26, 35, 36, 37], "reproduc": [10, 21], "ln": [10, 16, 21, 23, 25, 26, 35, 36, 37], "9999": [10, 25, 26, 35], "symlink": 10, "pass": [10, 16, 18, 19, 21, 23, 25, 26, 33, 35, 36, 37], "max": [10, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "durat": [10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "600": [10, 21, 23, 25, 35, 36, 37], "reason": [10, 36], "support": [11, 12, 16, 18, 21, 23, 25, 26, 33, 35, 36, 37], "perform": [11, 12, 18, 36], "raspberri": [11, 35], "pi": [11, 35], "project": 11, "static": [11, 35], "produc": [11, 23, 25, 26, 35, 36, 37], "binari": [11, 16, 18, 19, 21, 23, 31, 35, 36], "everyth": 11, "tree": [12, 13, 14, 16, 18, 19, 21, 24, 28, 29, 31, 35], "insid": [12, 14], "encod": [12, 14, 16, 18, 19, 21, 23, 24, 25, 31, 33, 35, 36, 37], "joiner": [12, 14, 18, 23, 35, 36, 37], "joiner_encoder_proj": 12, "joiner_decoder_proj": 12, "onnx_pretrain": 12, "proj": 12, "baz": [12, 14], "onnxruntim": 12, "our": [13, 14, 21, 23, 33, 36, 37], "torchscript": [13, 14, 35], "cpu_jit": [13, 16, 21, 23, 25, 26, 36, 37], "confus": 13, "move": [13, 23, 25, 26, 36, 37], "why": 13, "streaming_asr": [13, 14, 35, 36, 37], "emform": 13, "conv_emform": 13, "offline_asr": [13, 23], "lstm_transducer_stateless2": [14, 35], "468000": [14, 35], "three": [14, 16, 18, 33], "encoder_jit_trac": [14, 35, 37], "decoder_jit_trac": [14, 35, 37], "joiner_jit_trac": [14, 35, 37], "jit_pretrain": [14, 25, 26, 35], "tutori": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "learn": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "singl": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "1best": [16, 19, 21, 24, 25, 26, 28, 29], "handl": [16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "automag": [16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "stop": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "control": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "By": [16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "default": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "execut": [16, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "mean": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 33, 35, 36, 37], "musan": [16, 19, 21, 23, 25, 26, 35, 36, 37], "sai": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "thei": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "won": [16, 19, 21, 23, 25, 26, 35, 36, 37], "re": [16, 19, 21, 23, 25, 26, 35, 36, 37], "intal": [16, 19], "initi": [16, 19], "sudo": [16, 19], "apt": [16, 19], "permiss": [16, 19], "commandlin": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "quit": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "often": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "experi": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "num": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "state": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "world": [16, 18, 19, 21, 23, 24, 25, 26, 35, 36, 37], "multi": [16, 18, 19, 21, 23, 25, 26, 33, 35, 36, 37], "machin": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "ddp": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "implement": [16, 18, 19, 21, 23, 25, 26, 33, 35, 36, 37], "present": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "later": [16, 19, 21, 23, 24, 25, 26, 28, 29, 35, 36, 37], "second": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "over": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "utter": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "pad": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "oom": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "v100": [16, 18, 19, 21], "nvidia": [16, 18, 19, 21], "due": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "usual": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "larger": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "caus": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "smaller": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "increas": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "tune": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "weight": [16, 19, 21, 25, 26, 35], "decai": [16, 19, 21, 25, 26, 35], "warmup": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "get_param": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "realli": [16, 19, 21, 23, 25, 26, 35, 36, 37], "directli": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "perturb": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "speed": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "factor": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "3x150": [16, 18, 19], "hour": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "These": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "rate": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "visual": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "logdir": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "labelsmooth": 16, "someth": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "tensorflow": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "found": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "continu": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "press": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "ctrl": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36, 37], "engw8ksktzqs24zbv5dgcg": 16, "22t11": 16, "scan": [16, 18, 19, 21, 23, 31, 35, 36], "116068": 16, "scalar": [16, 18, 19, 21, 23, 31, 35, 36], "listen": [16, 18, 19, 23, 31, 35, 36], "url": [16, 18, 19, 21, 23, 25, 26, 31, 35, 36], "xxxx": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "text": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "saw": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "consol": [16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "typic": [16, 18, 19, 21], "avoid": [16, 18, 21], "commonli": [16, 18, 19, 21, 24, 28, 29, 31], "nbest": [16, 21, 26], "scale": [16, 21, 24, 26, 28, 29], "lattic": [16, 19, 21, 23, 24, 28, 29, 36, 37], "score": [16, 21, 23, 36, 37], "uniqu": [16, 21, 23, 36, 37], "pkufool": [16, 19, 24], "icefall_asr_aishell_conformer_ctc": 16, "transcrib": [16, 18, 19, 21], "lang_char": [16, 18], "token": [16, 18, 19, 21, 24, 28, 29, 31], "word": [16, 18, 19, 21, 24, 28, 29, 31], "bac009s0764w0121": [16, 18, 19], "bac009s0764w0122": [16, 18, 19], "bac009s0764w0123": [16, 18, 19], "tran": [16, 19, 21, 24, 28, 29], "graph": [16, 19, 21, 23, 24, 28, 29, 36, 37], "id": [16, 19, 21, 24, 28, 29], "conveni": [16, 19, 21], "eo": [16, 19, 21], "easili": [16, 19, 21], "obtain": [16, 18, 19, 21, 24, 28, 29], "84": 16, "list": [16, 18, 19, 21, 24, 28, 29], "soxi": [16, 18, 19, 21, 24, 31], "sampl": [16, 18, 19, 21, 24, 25, 31, 36, 37], "precis": [16, 18, 19, 21, 23, 24, 31, 36, 37], "bit": [16, 18, 19, 21, 24, 31], "04": [16, 18, 19, 21, 24, 28, 29], "67263": [16, 18, 19], "315": [16, 18, 19, 21, 24], "cdda": [16, 18, 19, 21, 24, 31], "sector": [16, 18, 19, 21, 24, 31], "135k": [16, 18, 19], "256k": [16, 18, 19, 21], "sign": [16, 18, 19, 21, 31], "integ": [16, 18, 19, 21, 31], "pcm": [16, 18, 19, 21, 31], "65840": [16, 18, 19], "625": [16, 18, 19], "132k": [16, 18, 19], "64000": [16, 18, 19], "300": [16, 18, 19, 21, 23, 36], "128k": [16, 18, 19, 31], "displai": [16, 18, 19, 21], "topologi": [16, 21], "07": [16, 18, 19, 21], "53": [16, 23, 24, 29, 35, 36], "707": [16, 21], "229": 16, "attention_dim": [16, 18, 21], "num_decoder_lay": [16, 21], "vgg_frontend": [16, 18, 21], "use_feat_batchnorm": [16, 21], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 16, "sun": 16, "sep": 16, "46": [16, 21], "33cfe45": 16, "d57a873": 16, "wed": [16, 18, 21], "nov": [16, 21], "hw": 16, "kangwei": 16, "icefall_aishell3": 16, "k2_releas": 16, "tokens_fil": 16, "words_fil": [16, 21, 31], "num_path": [16, 21, 23, 36, 37], "ngram_lm_scal": [16, 21], "attention_decoder_scal": [16, 21], "nbest_scal": [16, 21], "sos_id": [16, 21], "eos_id": [16, 21], "num_class": [16, 21, 31], "4336": [16, 18], "708": [16, 18, 21, 31], "242": [16, 21], "131": [16, 21], "134": 16, "269": [16, 28, 29], "275": 16, "241": 16, "293": [16, 21], "704": [16, 28], "369": [16, 21], "\u751a": [16, 18], "\u81f3": [16, 18], "\u51fa": [16, 18], "\u73b0": [16, 18], "\u4ea4": [16, 18], "\u6613": [16, 18], "\u51e0": [16, 18], "\u4e4e": [16, 18], "\u505c": [16, 18], "\u6b62": 16, "\u7684": [16, 18, 19], "\u60c5": [16, 18], "\u51b5": [16, 18], "\u4e00": [16, 18], "\u4e8c": [16, 18], "\u7ebf": [16, 18, 19], "\u57ce": [16, 18], "\u5e02": [16, 18], "\u867d": [16, 18], "\u7136": [16, 18], "\u4e5f": [16, 18, 19], "\u5904": [16, 18], "\u4e8e": [16, 18], "\u8c03": [16, 18], "\u6574": [16, 18], "\u4e2d": [16, 18, 19], "\u4f46": [16, 18, 19], "\u56e0": [16, 18], "\u4e3a": [16, 18], "\u805a": [16, 18], "\u96c6": [16, 18], "\u4e86": [16, 18, 19], "\u8fc7": [16, 18], "\u591a": [16, 18], "\u516c": [16, 18], "\u5171": [16, 18], "\u8d44": [16, 18], "\u6e90": [16, 18], "371": 16, "37": [16, 18, 21, 28], "38": [16, 18, 21, 28], "683": 16, "47": [16, 21], "651": [16, 31], "654": 16, "659": 16, "752": 16, "321": 16, "887": 16, "340": 16, "370": 16, "\u751a\u81f3": [16, 19], "\u51fa\u73b0": [16, 19], "\u4ea4\u6613": [16, 19], "\u51e0\u4e4e": [16, 19], "\u505c\u6b62": 16, "\u60c5\u51b5": [16, 19], "\u4e00\u4e8c": [16, 19], "\u57ce\u5e02": [16, 19], "\u867d\u7136": [16, 19], "\u5904\u4e8e": [16, 19], "\u8c03\u6574": [16, 19], "\u56e0\u4e3a": [16, 19], "\u805a\u96c6": [16, 19], "\u8fc7\u591a": [16, 19], "\u516c\u5171": [16, 19], "\u8d44\u6e90": [16, 19], "372": 16, "recor": [16, 21], "highest": [16, 21], "965": 16, "966": 16, "821": 16, "822": 16, "826": 16, "916": 16, "115": [16, 21], "345": 16, "888": 16, "889": 16, "limit": [16, 18, 21, 33, 36], "memori": [16, 18, 21, 33], "upgrad": [16, 21], "pro": [16, 21], "finish": [16, 18, 19, 21, 23, 24, 28, 29, 31, 36, 37], "deploi": [16, 21], "At": [16, 21], "doe": [16, 18, 21, 31], "checkout": [16, 21], "v2": [16, 21], "cmake": [16, 21, 35], "dcmake_build_typ": [16, 21, 35], "j": [16, 21], "hlg_decod": [16, 21], "four": [16, 21], "messag": [16, 21, 23, 25, 26, 35, 36, 37], "nn_model": [16, 21], "use_gpu": [16, 21], "word_tabl": [16, 21], "caution": [16, 21], "forward": [16, 21, 25], "cu": [16, 21], "int": [16, 21], "char": [16, 21], "124": [16, 21], "98": 16, "142": [16, 19, 21], "150": [16, 21], "693": [16, 28], "165": [16, 21], "nnet_output": [16, 21], "182": [16, 24], "180": [16, 21], "489": 16, "45": [16, 18, 21], "216": [16, 21, 28, 29], "mandarin": 17, "corpu": 17, "beij": 17, "shell": 17, "technologi": 17, "ltd": 17, "400": 17, "peopl": 17, "accent": 17, "area": 17, "china": 17, "invit": 17, "particip": 17, "conduct": 17, "quiet": 17, "indoor": 17, "high": 17, "fidel": 17, "microphon": 17, "downsampl": 17, "16khz": 17, "manual": 17, "accuraci": 17, "through": 17, "profession": 17, "annot": 17, "strict": 17, "inspect": 17, "free": [17, 35], "academ": 17, "moder": 17, "amount": 17, "research": 17, "field": 17, "openslr": 17, "ctc": [17, 20, 22, 26, 27, 30], "stateless": [17, 20, 23, 35, 36, 37], "instead": [18, 36], "rnn": [18, 23, 25, 35, 36, 37], "As": [18, 21], "head": [18, 33], "dim": [18, 23, 36], "layer": [18, 23, 33, 35, 36, 37], "feedforward": [18, 23, 36], "embed": [18, 23, 35, 36, 37], "conv1d": [18, 23, 35, 36, 37], "kernel": 18, "left": [18, 36, 37], "context": [18, 23, 33, 35, 36, 37], "nn": [18, 23, 25, 26, 35, 36, 37], "tanh": 18, "linear": 18, "borrow": 18, "ieeexplor": 18, "ieee": 18, "stamp": 18, "jsp": 18, "arnumb": 18, "9054419": 18, "predict": [18, 23, 35, 36, 37], "modif": 18, "right": [18, 33, 36], "charact": 18, "unit": 18, "vocabulari": 18, "87939824": 18, "88": 18, "optimized_transduc": 18, "extra": [18, 33, 36], "technqiu": 18, "propos": [18, 33, 37], "improv": 18, "end": [18, 23, 25, 26, 31, 35, 36, 37], "furthermor": 18, "maximum": 18, "emit": 18, "per": [18, 23, 36, 37], "frame": [18, 23, 25, 36, 37], "simplifi": [18, 33], "significantli": 18, "degrad": 18, "exactli": 18, "benchmark": 18, "unprun": 18, "advantag": 18, "minim": 18, "pruned_transducer_stateless": [18, 23, 33, 36], "altern": 18, "though": 18, "transducer_stateless_modifi": 18, "option": [18, 24, 28, 29, 31], "pr": 18, "gb": 18, "ram": 18, "small": [18, 28, 29, 31], "tri": 18, "prob": [18, 35], "appli": [18, 33], "configur": [18, 24, 28, 29, 31], "c": [18, 19, 23, 25, 26, 31, 35, 36, 37], "lagz6hrcqxoigbfd5e0y3q": 18, "03t14": 18, "8477": 18, "sym": [18, 23, 36, 37], "beam_search": [18, 23, 36, 37], "decoding_method": 18, "beam_4": 18, "28": [18, 21, 24], "ensur": 18, "give": 18, "poor": 18, "531": [18, 19], "994": [18, 21], "176": [18, 21], "027": 18, "encoder_out_dim": 18, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 18, "feb": 18, "50d2281": 18, "mar": 18, "0815224919": 18, "75d558775b": 18, "mmnv8": 18, "72": [18, 21], "878": [18, 29], "257": [18, 28, 29], "880": 18, "267": [18, 28, 29], "891": 18, "__floordiv__": 18, "length": [18, 36, 37], "x_len": 18, "163": [18, 21], "320": 18, "\u6ede": 18, "322": 18, "759": 18, "760": 18, "919": 18, "922": 18, "046": 18, "047": 18, "319": [18, 21], "214": [18, 21], "215": [18, 21, 24], "402": 18, "topk_hyp_index": 18, "topk_index": 18, "logit": 18, "583": [18, 29], "2000": 19, "lji9mwuorlow3jkdhxwk8a": 19, "13t11": 19, "4454": 19, "icefall_asr_aishell_tdnn_lstm_ctc": 19, "858": [19, 21], "389": [19, 21], "154": 19, "161": [19, 21], "536": 19, "171": [19, 21, 28, 29], "539": 19, "917": 19, "207": [19, 21], "129": 19, "\u505c\u6ede": 19, "222": [19, 21], "statelessx": [20, 22, 32, 33, 34], "zipform": [20, 22, 32, 34], "mmi": [20, 22], "blank": [20, 22], "skip": [20, 22, 23, 35, 36, 37], "ligru": [20, 27], "full": [21, 23, 25, 26, 35, 36, 37], "libri": [21, 23, 25, 26, 35, 36, 37], "960": [21, 23, 25, 26, 35, 36, 37], "subset": [21, 23, 25, 26, 35, 36, 37], "3x960": [21, 23, 25, 26, 35, 36, 37], "2880": [21, 23, 25, 26, 35, 36, 37], "lzgnetjwrxc3yghnmd4kpw": 21, "24t16": 21, "43": 21, "4540": 21, "sentenc": 21, "piec": 21, "And": [21, 23, 25, 26, 35, 36, 37], "neither": 21, "nor": 21, "vocab": 21, "work": 21, "5000": 21, "44": [21, 28, 29], "033": 21, "538": 21, "full_libri": 21, "406": 21, "464": 21, "548": 21, "776": 21, "652": [21, 31], "109226120": 21, "714": [21, 28], "473": 21, "944": 21, "1328": 21, "54": [21, 24, 28, 29], "443": [21, 24], "2563": 21, "56": [21, 28], "494": 21, "592": 21, "331": [21, 24], "1715": 21, "52576": 21, "1424": 21, "807": 21, "506": 21, "808": [21, 28], "522": 21, "362": 21, "565": 21, "1477": 21, "106": 21, "2922": 21, "208": 21, "4295": 21, "52343": 21, "396": 21, "3584": 21, "433": 21, "680": [21, 28], "_pickl": 21, "unpicklingerror": 21, "kei": 21, "hlg_modifi": 21, "g_4_gram": [21, 24, 28, 29], "106000": [21, 24], "496": [21, 24], "875": [21, 24], "212k": 21, "267440": [21, 24], "1253": [21, 24], "535k": 21, "83": [21, 24], "77200": [21, 24], "361": [21, 24], "154k": 21, "554": 21, "260": 21, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 21, "tue": 21, "8d93169": 21, "266": [21, 24], "268": [21, 24], "601": 21, "758": 21, "025": 21, "204": 21, "425": 21, "broffel": 21, "osom": 21, "427": 21, "723": 21, "775": 21, "881": 21, "352": 21, "234": 21, "384": 21, "whole": [21, 24, 28, 29, 36, 37], "ngram": [21, 24, 28, 29], "857": 21, "979": 21, "980": 21, "055": 21, "117": 21, "051": 21, "363": 21, "959": [21, 29], "546": 21, "598": 21, "599": [21, 24], "833": 21, "834": 21, "915": 21, "076": 21, "110": 21, "397": 21, "999": [21, 23, 36, 37], "concaten": 21, "bucket": 21, "sampler": 21, "1000": 21, "ctc_decod": 21, "ngram_lm_rescor": 21, "attention_rescor": 21, "kind": [21, 23, 25, 26, 35, 36, 37], "316": 21, "118": 21, "221": 21, "125": [21, 31], "136": 21, "144": 21, "159": [21, 31], "543": 21, "174": 21, "topo": 21, "547": 21, "729": 21, "111": 21, "702": 21, "703": 21, "545": 21, "122": 21, "280": 21, "135": [21, 31], "153": [21, 31], "945": 21, "475": 21, "191": [21, 28, 29], "398": 21, "199": [21, 24], "515": 21, "205": 21, "w": [21, 28, 29], "deseri": 21, "441": 21, "fsaclass": 21, "loadfsa": 21, "const": 21, "string": 21, "c10": 21, "ignor": 21, "dummi": 21, "589": 21, "attention_scal": 21, "656": 21, "162": 21, "169": [21, 28, 29], "188": 21, "624": 21, "519": [21, 29], "632": 21, "645": [21, 31], "243": 21, "970": 21, "303": 21, "179": 21, "suitabl": [23, 35, 36, 37], "pruned_transducer_stateless2": [23, 33, 36], "pruned_transducer_stateless4": [23, 33, 36], "pruned_transducer_stateless5": [23, 33, 36], "scroll": [23, 25, 26, 35, 36, 37], "scratch": [23, 25, 26, 35, 36, 37], "paper": [23, 35, 36, 37], "arxiv": [23, 35, 36, 37], "ab": [23, 35, 36, 37], "2206": [23, 35, 36, 37], "13236": [23, 35, 36, 37], "rework": [23, 33, 36], "daniel": [23, 36, 37], "joint": [23, 35, 36, 37], "contrari": [23, 35, 36, 37], "convent": [23, 35, 36, 37], "recurr": [23, 35, 36, 37], "fp16": [23, 25, 26, 35, 36, 37], "half": [23, 36, 37], "2x": [23, 36, 37], "dimens": [23, 36, 37], "littl": [23, 36], "allow": [23, 36], "436000": [23, 25, 26, 35, 36, 37], "438000": [23, 25, 26, 35, 36, 37], "qogspbgsr8kzcrmmie9jgw": 23, "20t15": [23, 35, 36], "4468": [23, 35, 36], "210171": [23, 35, 36], "access": [23, 25, 26, 35, 36, 37], "6008": [23, 25, 26, 35, 36, 37], "localhost": [23, 25, 26, 35, 36, 37], "expos": [23, 25, 26, 35, 36, 37], "proxi": [23, 25, 26, 35, 36, 37], "bind_al": [23, 25, 26, 35, 36, 37], "suggest": [23, 25, 26, 35, 36, 37], "both": [23, 25, 26, 35, 36, 37], "lowest": [23, 25, 26, 35, 36, 37], "fast_beam_search": [23, 25, 35, 36, 37], "474000": [23, 35, 36, 37], "largest": [23, 36, 37], "posterior": [23, 25, 36, 37], "algorithm": [23, 36, 37], "pdf": [23, 26, 36, 37], "1211": [23, 36, 37], "3711": [23, 36, 37], "espnet": [23, 36, 37], "net": [23, 36, 37], "beam_search_transduc": [23, 36, 37], "basicli": [23, 36, 37], "topk": [23, 36, 37], "expand": [23, 36, 37], "mode": [23, 36, 37], "being": [23, 36, 37], "hardcod": [23, 36, 37], "composit": [23, 36, 37], "between": [23, 36, 37], "log_prob": [23, 36, 37], "hard": [23, 33, 36, 37], "2211": [23, 36, 37], "00484": [23, 36, 37], "rnnt": [23, 36, 37], "effici": [23, 36, 37], "fast_beam_search_lg": [23, 36, 37], "trivial": [23, 36, 37], "fast_beam_search_nbest": [23, 36, 37], "random_path": [23, 36, 37], "shortest": [23, 36, 37], "fast_beam_search_nbest_lg": [23, 36, 37], "logic": [23, 36, 37], "includ": [23, 25, 26, 35, 36, 37], "But": [23, 25, 26, 35, 36, 37], "smallest": [23, 35, 36, 37], "icefall_asr_librispeech_tdnn": 24, "lstm_ctc": 24, "flac": 24, "116k": 24, "140k": 24, "343k": 24, "164k": 24, "105k": 24, "174k": 24, "pretraind": 24, "168": 24, "170": 24, "581": 24, "584": [24, 29], "209": 24, "791": 24, "245": 24, "099": 24, "methond": [24, 28, 29], "725": 24, "403": 24, "631": 24, "010": 24, "guidanc": 25, "calcul": [25, 36, 37], "bigger": 25, "threshold": 25, "simpli": 25, "discard": 25, "prevent": 25, "convolut": [25, 33, 36], "similar": [25, 36, 37], "lconv": 25, "encourag": [25, 26, 35], "stabil": [25, 26], "doesn": 25, "warm": [25, 26], "pruned_transducer_stateless7_ctc_b": 25, "xyozukpeqm62hbilud4upa": [25, 26], "ctc_guild_decode_b": 25, "pretrained_ctc": 25, "jit_pretrained_ctc": 25, "yfyeung": 25, "wechat": 26, "zipformer_mmi": 26, "worker": [26, 35], "hp": 26, "zengwei": [26, 35], "tdnn_ligru_ctc": 28, "enough": [28, 29, 31], "luomingshuang": [28, 29], "icefall_asr_timit_tdnn_ligru_ctc": 28, "pretrained_average_9_25": 28, "fdhc0_si1559": [28, 29], "felc0_si756": [28, 29], "fmgd0_si1564": [28, 29], "ffprobe": [28, 29], "show_format": [28, 29], "nistspher": [28, 29], "database_id": [28, 29], "database_vers": [28, 29], "utterance_id": [28, 29], "dhc0_si1559": [28, 29], "sample_min": [28, 29], "4176": [28, 29], "sample_max": [28, 29], "5984": [28, 29], "bitrat": [28, 29], "258": [28, 29], "audio": [28, 29], "pcm_s16le": [28, 29], "hz": [28, 29], "s16": [28, 29], "256": [28, 29], "elc0_si756": [28, 29], "1546": [28, 29], "1989": [28, 29], "mgd0_si1564": [28, 29], "7626": [28, 29], "10573": [28, 29], "660": 28, "183": [28, 29], "695": 28, "697": 28, "210": [28, 29], "829": 28, "sil": [28, 29], "dh": [28, 29], "ih": [28, 29], "uw": [28, 29], "ah": [28, 29], "ii": [28, 29], "z": [28, 29], "aa": [28, 29], "ei": [28, 29], "dx": [28, 29], "uh": [28, 29], "ng": [28, 29], "th": [28, 29], "eh": [28, 29], "jh": [28, 29], "er": [28, 29], "ai": [28, 29], "hh": [28, 29], "aw": 28, "ae": [28, 29], "705": 28, "715": 28, "720": 28, "251": [28, 29], "348": 28, "ch": 28, "icefall_asr_timit_tdnn_lstm_ctc": 29, "pretrained_average_16_25": 29, "816": 29, "827": 29, "387": 29, "unk": 29, "739": 29, "971": 29, "977": 29, "978": 29, "981": 29, "ow": 29, "ykubhb5wrmosxykid1z9eg": 31, "23t23": 31, "sinc": [31, 35], "icefall_asr_yesno_tdnn": 31, "l_disambig": 31, "lexicon_disambig": 31, "arpa": 31, "0_0_0_1_0_0_0_1": 31, "0_0_1_0_0_0_1_0": 31, "0_0_1_0_0_1_1_1": 31, "0_0_1_0_1_0_0_1": 31, "0_0_1_1_0_0_0_1": 31, "0_0_1_1_0_1_1_0": 31, "0_0_1_1_1_0_0_0": 31, "0_0_1_1_1_1_0_0": 31, "0_1_0_0_0_1_0_0": 31, "0_1_0_0_1_0_1_0": 31, "0_1_0_1_0_0_0_0": 31, "0_1_0_1_1_1_0_0": 31, "0_1_1_0_0_1_1_1": 31, "0_1_1_1_0_0_1_0": 31, "0_1_1_1_1_0_1_0": 31, "1_0_0_0_0_0_0_0": 31, "1_0_0_0_0_0_1_1": 31, "1_0_0_1_0_1_1_1": 31, "1_0_1_1_0_1_1_1": 31, "1_0_1_1_1_1_0_1": 31, "1_1_0_0_0_1_1_1": 31, "1_1_0_0_1_0_1_1": 31, "1_1_0_1_0_1_0_0": 31, "1_1_0_1_1_0_0_1": 31, "1_1_0_1_1_1_1_0": 31, "1_1_1_0_0_1_0_1": 31, "1_1_1_0_1_0_1_0": 31, "1_1_1_1_0_0_1_0": 31, "1_1_1_1_1_0_0_0": 31, "1_1_1_1_1_1_1_1": 31, "54080": 31, "507": 31, "108k": 31, "No": 31, "ye": 31, "hebrew": 31, "NO": 31, "621": 31, "119": 31, "127": 31, "650": 31, "139": 31, "143": 31, "198": 31, "181": 31, "186": 31, "187": 31, "287": 31, "correctli": 31, "simplest": 31, "former": 33, "idea": 33, "achiev": 33, "mask": [33, 36, 37], "wenet": 33, "did": 33, "argument": 33, "adapt": 33, "complic": 33, "techniqu": 33, "bank": 33, "compon": 33, "memor": 33, "histori": 33, "introduc": 33, "variant": 33, "pruned_stateless_emformer_rnnt2": 33, "conv_emformer_transducer_stateless": 33, "convemform": 33, "ourself": 33, "mechan": 33, "conv_emformer_transducer_stateless2": 33, "onlin": 35, "lstm_transducer_stateless": 35, "architectur": 35, "lower": 35, "prepare_giga_speech": 35, "cj2vtpiwqhkn9q1tx6ptpg": 35, "hidden": 35, "1024": 35, "pnnx": 35, "submodul": 35, "updat": 35, "recurs": 35, "init": 35, "dncnn_python": 35, "dncnn_build_benchmark": 35, "off": 35, "dncnn_build_exampl": 35, "dncnn_build_tool": 35, "j4": 35, "pwd": 35, "quantiz": 35, "abl": 35, "ncnn2int8": 35, "third": 35, "param": 35, "extern": 35, "stateless2": 35, "compar": 36, "dynam": [36, 37], "chunk": [36, 37], "causal": 36, "short": [36, 37], "2012": 36, "05481": 36, "flag": 36, "indic": [36, 37], "whether": 36, "must": 36, "sequenc": [36, 37], "uniformli": [36, 37], "seen": [36, 37], "97vkxf80ru61cnp2alwzzg": 36, "streaming_decod": [36, 37], "acoust": [36, 37], "wise": [36, 37], "subsampl": [36, 37], "equal": [36, 37], "where": 36, "parallel": [36, 37], "bath": [36, 37], "parallelli": [36, 37], "seem": 36, "benefit": 36, "might": [36, 37], "mismatch": 36, "yourself": [36, 37], "mdoel": 36, "pruned_transducer_stateless7_stream": 37, "len": 37, "320m": 37, "550": 37, "scriptmodul": 37, "jit_trace_export": 37, "jit_trace_pretrain": 37, "task": 38}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 10, 12, 13, 14], "creat": [2, 9], "recip": [2, 38], "data": [2, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "prepar": [2, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "train": [2, 6, 9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "decod": [2, 9, 10, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "pre": [2, 6, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "model": [2, 6, 10, 12, 13, 14, 15, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "frequent": 4, "ask": 4, "question": 4, "faq": 4, "oserror": 4, "libtorch_hip": 4, "so": 4, "cannot": 4, "open": 4, "share": 4, "object": 4, "file": 4, "directori": 4, "attributeerror": 4, "modul": 4, "distutil": 4, "ha": 4, "attribut": 4, "version": 4, "huggingfac": [5, 7], "space": 7, "youtub": [7, 9], "video": [7, 9], "icefal": [8, 9], "content": [8, 38], "instal": [9, 16, 18, 19, 21, 24, 28, 29], "0": 9, "pytorch": 9, "torchaudio": 9, "1": [9, 16, 18, 19, 21], "k2": 9, "2": [9, 16, 18, 19, 21], "lhots": 9, "3": [9, 16, 18, 21], "download": [9, 16, 18, 19, 21, 23, 24, 25, 26, 28, 29, 31, 35, 36, 37], "exampl": [9, 16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "virtual": 9, "environ": 9, "activ": 9, "your": 9, "4": 9, "5": 9, "test": 9, "export": [10, 11, 12, 13, 14, 15, 23, 25, 26, 35, 36, 37], "state_dict": [10, 23, 25, 26, 35, 36, 37], "when": [10, 12, 13, 14], "us": [10, 12, 13, 14, 23, 25, 26, 35, 36, 37], "run": 10, "py": 10, "ncnn": [11, 35], "onnx": 12, "torch": [13, 14, 23, 25, 26, 35, 36, 37], "jit": [13, 14, 23, 25, 26, 35, 36, 37], "script": [13, 23, 25, 26, 36, 37], "trace": [14, 35, 37], "conform": [16, 21, 33], "ctc": [16, 19, 21, 24, 25, 28, 29, 31], "configur": [16, 19, 21, 23, 25, 26, 35, 36, 37], "option": [16, 19, 21, 23, 25, 26, 35, 36, 37], "log": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "usag": [16, 18, 19, 21, 23, 25, 26, 35, 36, 37], "case": [16, 18, 19, 21], "kaldifeat": [16, 18, 19, 21, 24, 28, 29, 31], "hlg": [16, 19, 21], "attent": [16, 21], "rescor": [16, 21], "colab": [16, 18, 19, 21, 24, 28, 29, 31], "notebook": [16, 18, 19, 21, 24, 28, 29, 31], "deploy": [16, 21], "c": [16, 21], "aishel": 17, "stateless": 18, "transduc": [18, 23, 35, 36, 37], "The": 18, "loss": 18, "todo": 18, "greedi": 18, "search": 18, "beam": 18, "modifi": 18, "tdnn": [19, 24, 28, 29, 31], "lstm": [19, 24, 29, 35], "non": 20, "stream": [20, 32, 33, 36, 37], "asr": [20, 32], "lm": 21, "comput": 21, "wer": 21, "n": 21, "gram": 21, "librispeech": [22, 34], "prune": [23, 36], "statelessx": [23, 36], "pretrain": [23, 25, 26, 35, 36, 37], "deploi": [23, 36, 37], "sherpa": [23, 36, 37], "infer": [24, 28, 29, 31], "zipform": [25, 26, 37], "blank": 25, "skip": 25, "mmi": 26, "timit": 27, "ligru": 28, "yesno": 30, "introduct": 33, "emform": 33, "which": 35, "simul": [36, 37], "real": [36, 37], "tabl": 38}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [18, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [18, "training"], [19, "training"], [21, "training"], [23, "training"], [24, "training"], [25, "training"], [26, "training"], [28, "training"], [29, "training"], [31, "training"], [35, "training"], [36, "training"], [37, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [16, "decoding"], [18, "decoding"], [19, "decoding"], [21, "decoding"], [23, "decoding"], [24, "decoding"], [25, "decoding"], [26, "decoding"], [28, "decoding"], [29, "decoding"], [31, "decoding"], [35, "decoding"], [36, "decoding"], [37, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "Frequently Asked Questions (FAQs)": [[4, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[4, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[4, "attributeerror-module-distutils-has-no-attribute-version"]], "Huggingface": [[5, "huggingface"]], "Pre-trained models": [[6, "pre-trained-models"]], "Huggingface spaces": [[7, "huggingface-spaces"]], "YouTube Video": [[7, "youtube-video"], [9, "youtube-video"]], "Icefall": [[8, "icefall"]], "Contents:": [[8, null]], "Installation": [[9, "installation"]], "(0) Install PyTorch and torchaudio": [[9, "install-pytorch-and-torchaudio"]], "(1) Install k2": [[9, "install-k2"]], "(2) Install lhotse": [[9, "install-lhotse"]], "(3) Download icefall": [[9, "download-icefall"]], "Installation example": [[9, "installation-example"]], "(1) Create a virtual environment": [[9, "create-a-virtual-environment"]], "(2) Activate your virtual environment": [[9, "activate-your-virtual-environment"]], "(3) Install k2": [[9, "id1"]], "(4) Install lhotse": [[9, "id2"]], "(5) Download icefall": [[9, "id3"]], "Test Your Installation": [[9, "test-your-installation"]], "Data preparation": [[9, "data-preparation"], [16, "data-preparation"], [19, "data-preparation"], [21, "data-preparation"], [23, "data-preparation"], [24, "data-preparation"], [25, "data-preparation"], [26, "data-preparation"], [28, "data-preparation"], [29, "data-preparation"], [31, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"]], "Export model.state_dict()": [[10, "export-model-state-dict"], [23, "export-model-state-dict"], [25, "export-model-state-dict"], [26, "export-model-state-dict"], [35, "export-model-state-dict"], [36, "export-model-state-dict"], [37, "export-model-state-dict"]], "When to use it": [[10, "when-to-use-it"], [12, "when-to-use-it"], [13, "when-to-use-it"], [14, "when-to-use-it"]], "How to export": [[10, "how-to-export"], [12, "how-to-export"], [13, "how-to-export"], [14, "how-to-export"]], "How to use the exported model": [[10, "how-to-use-the-exported-model"], [12, "how-to-use-the-exported-model"], [13, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[10, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[11, "export-to-ncnn"]], "Export to ONNX": [[12, "export-to-onnx"]], "Export model with torch.jit.script()": [[13, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[14, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[14, "how-to-use-the-exported-models"]], "Model export": [[15, "model-export"]], "Conformer CTC": [[16, "conformer-ctc"], [21, "conformer-ctc"]], "Configurable options": [[16, "configurable-options"], [19, "configurable-options"], [21, "configurable-options"], [23, "configurable-options"], [25, "configurable-options"], [26, "configurable-options"], [35, "configurable-options"], [36, "configurable-options"], [37, "configurable-options"]], "Pre-configured options": [[16, "pre-configured-options"], [19, "pre-configured-options"], [21, "pre-configured-options"], [23, "pre-configured-options"], [25, "pre-configured-options"], [26, "pre-configured-options"], [35, "pre-configured-options"], [36, "pre-configured-options"], [37, "pre-configured-options"]], "Training logs": [[16, "training-logs"], [18, "training-logs"], [19, "training-logs"], [21, "training-logs"], [23, "training-logs"], [25, "training-logs"], [26, "training-logs"], [35, "training-logs"], [36, "training-logs"], [37, "training-logs"]], "Usage examples": [[16, "usage-examples"], [18, "usage-examples"], [19, "usage-examples"], [21, "usage-examples"]], "Case 1": [[16, "case-1"], [18, "case-1"], [19, "case-1"], [21, "case-1"]], "Case 2": [[16, "case-2"], [18, "case-2"], [19, "case-2"], [21, "case-2"]], "Case 3": [[16, "case-3"], [18, "case-3"], [21, "case-3"]], "Pre-trained Model": [[16, "pre-trained-model"], [18, "pre-trained-model"], [19, "pre-trained-model"], [21, "pre-trained-model"], [24, "pre-trained-model"], [28, "pre-trained-model"], [29, "pre-trained-model"], [31, "pre-trained-model"]], "Install kaldifeat": [[16, "install-kaldifeat"], [18, "install-kaldifeat"], [19, "install-kaldifeat"], [21, "install-kaldifeat"], [24, "install-kaldifeat"], [28, "install-kaldifeat"], [29, "install-kaldifeat"]], "Download the pre-trained model": [[16, "download-the-pre-trained-model"], [18, "download-the-pre-trained-model"], [19, "download-the-pre-trained-model"], [21, "download-the-pre-trained-model"], [24, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [29, "download-the-pre-trained-model"], [31, "download-the-pre-trained-model"]], "Usage": [[16, "usage"], [18, "usage"], [19, "usage"], [21, "usage"]], "CTC decoding": [[16, "ctc-decoding"], [21, "ctc-decoding"], [21, "id2"]], "HLG decoding": [[16, "hlg-decoding"], [16, "id2"], [19, "hlg-decoding"], [21, "hlg-decoding"], [21, "id3"]], "HLG decoding + attention decoder rescoring": [[16, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[16, "colab-notebook"], [18, "colab-notebook"], [19, "colab-notebook"], [21, "colab-notebook"], [24, "colab-notebook"], [28, "colab-notebook"], [29, "colab-notebook"], [31, "colab-notebook"]], "Deployment with C++": [[16, "deployment-with-c"], [21, "deployment-with-c"]], "aishell": [[17, "aishell"]], "Stateless Transducer": [[18, "stateless-transducer"]], "The Model": [[18, "the-model"]], "The Loss": [[18, "the-loss"]], "Todo": [[18, "id1"]], "Greedy search": [[18, "greedy-search"]], "Beam search": [[18, "beam-search"]], "Modified Beam search": [[18, "modified-beam-search"]], "TDNN-LSTM CTC": [[19, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[20, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[21, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[21, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[21, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[21, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[21, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "LibriSpeech": [[22, "librispeech"], [34, "librispeech"]], "Pruned transducer statelessX": [[23, "pruned-transducer-statelessx"], [36, "pruned-transducer-statelessx"]], "Usage example": [[23, "usage-example"], [25, "usage-example"], [26, "usage-example"], [35, "usage-example"], [36, "usage-example"], [37, "usage-example"]], "Export Model": [[23, "export-model"], [36, "export-model"], [37, "export-model"]], "Export model using torch.jit.script()": [[23, "export-model-using-torch-jit-script"], [25, "export-model-using-torch-jit-script"], [26, "export-model-using-torch-jit-script"], [36, "export-model-using-torch-jit-script"], [37, "export-model-using-torch-jit-script"]], "Download pretrained models": [[23, "download-pretrained-models"], [25, "download-pretrained-models"], [26, "download-pretrained-models"], [35, "download-pretrained-models"], [36, "download-pretrained-models"], [37, "download-pretrained-models"]], "Deploy with Sherpa": [[23, "deploy-with-sherpa"], [36, "deploy-with-sherpa"], [37, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[24, "tdnn-lstm-ctc"], [29, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[24, "inference-with-a-pre-trained-model"], [28, "inference-with-a-pre-trained-model"], [29, "inference-with-a-pre-trained-model"], [31, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[25, "zipformer-ctc-blank-skip"]], "Export models": [[25, "export-models"], [26, "export-models"], [35, "export-models"]], "Zipformer MMI": [[26, "zipformer-mmi"]], "TIMIT": [[27, "timit"]], "TDNN-LiGRU-CTC": [[28, "tdnn-ligru-ctc"]], "YesNo": [[30, "yesno"]], "TDNN-CTC": [[31, "tdnn-ctc"]], "Download kaldifeat": [[31, "download-kaldifeat"]], "Streaming ASR": [[32, "streaming-asr"]], "Introduction": [[33, "introduction"]], "Streaming Conformer": [[33, "streaming-conformer"]], "Streaming Emformer": [[33, "streaming-emformer"]], "LSTM Transducer": [[35, "lstm-transducer"]], "Which model to use": [[35, "which-model-to-use"]], "Export model using torch.jit.trace()": [[35, "export-model-using-torch-jit-trace"], [37, "export-model-using-torch-jit-trace"]], "Export model for ncnn": [[35, "export-model-for-ncnn"]], "Simulate streaming decoding": [[36, "simulate-streaming-decoding"], [37, "simulate-streaming-decoding"]], "Real streaming decoding": [[36, "real-streaming-decoding"], [37, "real-streaming-decoding"]], "Zipformer Transducer": [[37, "zipformer-transducer"]], "Recipes": [[38, "recipes"]], "Table of Contents": [[38, null]]}, "indexentries": {}})
\ No newline at end of file