diff --git a/_sources/for-dummies/model-export.rst.txt b/_sources/for-dummies/model-export.rst.txt index 352a0dc90..a3dd9088f 100644 --- a/_sources/for-dummies/model-export.rst.txt +++ b/_sources/for-dummies/model-export.rst.txt @@ -41,7 +41,7 @@ To give you an idea of what ``tdnn/exp/pretrained.pt`` contains, we can use the .. code-block:: python3 >>> import torch - >>> m = torch.load("tdnn/exp/pretrained.pt") + >>> m = torch.load("tdnn/exp/pretrained.pt", weights_only=False) >>> list(m.keys()) ['model'] >>> list(m["model"].keys()) diff --git a/for-dummies/model-export.html b/for-dummies/model-export.html index 096ca3636..b14c8434c 100644 --- a/for-dummies/model-export.html +++ b/for-dummies/model-export.html @@ -134,7 +134,7 @@

We can see from the logs that the exported model is saved to the file tdnn/exp/pretrained.pt.

To give you an idea of what tdnn/exp/pretrained.pt contains, we can use the following command:

>>> import torch
->>> m = torch.load("tdnn/exp/pretrained.pt")
+>>> m = torch.load("tdnn/exp/pretrained.pt", weights_only=False)
 >>> list(m.keys())
 ['model']
 >>> list(m["model"].keys())
diff --git a/searchindex.js b/searchindex.js
index b95a7ae67..774866392 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "fst-based-forced-alignment/diff", "fst-based-forced-alignment/index", "fst-based-forced-alignment/k2-based", "fst-based-forced-alignment/kaldi-based", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Finetune/adapter/finetune_adapter", "recipes/Finetune/from_supervised/finetune_zipformer", "recipes/Finetune/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/TTS/vctk/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "fst-based-forced-alignment/diff.rst", "fst-based-forced-alignment/index.rst", "fst-based-forced-alignment/k2-based.rst", "fst-based-forced-alignment/kaldi-based.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Finetune/adapter/finetune_adapter.rst", "recipes/Finetune/from_supervised/finetune_zipformer.rst", "recipes/Finetune/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/TTS/vctk/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Two approaches", "FST-based forced alignment", "k2-based forced alignment", "Kaldi-based forced alignment", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Finetune from a pre-trained Zipformer model with adapters", "Finetune from a supervised pre-trained Zipformer model", "Fine-tune a pre-trained model", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS-LJSpeech", "VITS-VCTK", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 41, 43, 44, 47, 51, 52, 54, 56, 58, 64, 65], "tool": [0, 10, 25, 28], "make": [0, 1, 3, 20, 28, 29, 30, 35, 38, 40, 43, 58], "consist": [0, 40, 46, 60, 61, 62], "possibl": [0, 2, 3, 38, 43], "black": 0, "format": [0, 28, 29, 30, 35, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "flake8": 0, "check": [0, 25, 43, 56, 64], "qualiti": [0, 39], "isort": 0, "sort": [0, 25, 56], "import": [0, 9, 10, 15, 20, 25, 28, 61, 62], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 26, 28, 29, 30, 35, 36, 38, 39, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "version": [0, 9, 13, 15, 24, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 51, 52, 61], "abov": [0, 4, 6, 7, 10, 13, 15, 20, 26, 28, 29, 30, 31, 38, 39, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61, 62, 64], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 25, 26, 28, 29, 30, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "22": [0, 9, 15, 20, 25, 28, 29, 35, 43, 51, 52, 54], "3": [0, 4, 6, 7, 9, 10, 11, 15, 20, 24, 26, 27, 31, 34, 35, 41, 44, 46, 47, 48, 49, 54, 56, 60, 61, 62, 64, 65], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "5": [0, 7, 15, 20, 27, 34, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "4": [0, 4, 5, 6, 7, 9, 10, 11, 13, 15, 20, 24, 26, 27, 34, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "10": [0, 7, 9, 15, 20, 24, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 27, 31, 32, 33, 34, 35, 36, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "after": [0, 1, 6, 9, 11, 12, 13, 16, 23, 25, 26, 28, 29, 30, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64], "run": [0, 2, 8, 10, 11, 13, 14, 15, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 65], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 20, 25, 26, 28, 29, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "git": [0, 4, 6, 7, 9, 13, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 56], "clone": [0, 4, 6, 7, 9, 13, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 56], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "github": [0, 2, 6, 9, 11, 13, 15, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "com": [0, 2, 6, 9, 11, 13, 22, 23, 25, 26, 28, 29, 32, 33, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "k2": [0, 2, 9, 10, 13, 15, 17, 18, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62, 64], "fsa": [0, 2, 9, 13, 15, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 43, 46, 48, 49, 60, 61, 62, 64], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 20, 22, 23, 26, 27, 31, 32, 33, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "pip": [0, 1, 6, 10, 13, 15, 25, 28, 31, 40, 64], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 20, 21, 23, 24, 26, 27, 31, 34, 35, 36, 44, 46, 48, 49, 54, 60, 61, 62, 63], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 21, 23, 24, 25, 27, 34, 44, 64, 66], "commit": [0, 25], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 22, 23, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "automat": [0, 14, 23, 44], "hook": 0, "invok": 0, "fail": [0, 20], "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 20, 23, 28, 29, 30, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "ani": [0, 4, 6, 7, 13, 20, 25, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 20, 21, 23, 24, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64], "wa": [0, 26, 43, 47], "success": [0, 25, 28, 29], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 23, 25, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "fix": [0, 9, 10, 13, 28, 29, 30, 43], "issu": [0, 4, 6, 7, 10, 25, 28, 29, 43, 44, 61, 62], "report": [0, 9, 10, 35, 44], "some": [0, 1, 4, 6, 9, 26, 28, 29, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 26, 27, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64], "e": [0, 2, 4, 5, 6, 7, 13, 20, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "modifi": [0, 20, 27, 34, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "file": [0, 2, 9, 14, 15, 18, 23, 24, 26, 28, 29, 30, 32, 33, 34, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 65], "place": [0, 25, 26, 40, 43, 47], "so": [0, 4, 6, 7, 9, 13, 23, 24, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 9, 15, 23, 25, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 17, 23, 25, 26, 28, 29, 30, 31, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 61, 62, 64], "ha": [0, 2, 17, 24, 25, 27, 28, 29, 30, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 58, 60, 61, 62], "been": [0, 25, 27, 28, 29, 30, 40], "befor": [0, 1, 11, 13, 15, 20, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "all": [0, 9, 11, 13, 14, 17, 20, 22, 23, 26, 28, 29, 30, 32, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "again": [0, 28, 29, 54], "should": [0, 2, 4, 6, 11, 13, 20, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 18, 20, 21, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "time": [0, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "succeed": [0, 20], "want": [0, 4, 6, 7, 11, 13, 15, 25, 26, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "do": [0, 2, 4, 6, 13, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 20, 21, 23, 38, 43], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 20, 25, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "sphinx": 1, "write": [1, 2, 3, 20], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 20, 22, 23, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "prepar": [1, 3, 4, 8, 14, 16, 18, 24, 26, 37, 63], "environ": [1, 10, 11, 12, 14, 16, 18, 24, 28, 29, 30, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62], "doc": [1, 26, 58], "r": [1, 13, 20, 25, 28, 29, 30, 51, 52], "requir": [1, 4, 6, 11, 13, 15, 25, 30, 35, 36, 44, 56, 61, 62, 64, 65], "txt": [1, 4, 9, 11, 13, 15, 20, 25, 26, 28, 29, 30, 31, 32, 33, 35, 38, 40, 41, 43, 47, 51, 52, 54, 56, 64, 65], "set": [1, 4, 6, 7, 10, 12, 13, 16, 20, 25, 28, 29, 30, 35, 36, 37, 38, 40, 41, 43, 44, 46, 48, 49, 54, 56, 60, 61, 62], "up": [1, 25, 26, 28, 29, 30, 35, 38, 41, 43, 44, 46, 47, 48, 49, 61, 62], "readi": [1, 20, 38, 43, 44, 56], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 18, 20, 25, 26, 27, 28, 29, 30, 32, 33, 35, 38, 40, 41, 43, 46, 47, 48, 51, 52, 54, 56, 58, 61, 62, 64], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 63], "local": [1, 9, 15, 20, 25, 46, 48, 49, 56, 60, 61, 62], "preview": 1, "what": [1, 2, 11, 15, 20, 25, 28, 29, 30, 40, 58, 64], "look": [1, 2, 4, 6, 7, 14, 20, 22, 25, 28, 29, 30, 38, 40, 41, 43, 44], "like": [1, 2, 9, 11, 20, 23, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61], "publish": [1, 26, 39], "html": [1, 2, 10, 11, 13, 15, 20, 25, 27, 28, 29, 30, 31, 32, 33, 46, 60, 61, 62, 64], "gener": [1, 6, 9, 14, 15, 18, 26, 28, 29, 30, 31, 32, 33, 37, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 64, 65], "view": [1, 8, 24, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "python3": [1, 9, 10, 13, 15, 25, 29, 30], "m": [1, 15, 20, 25, 28, 29, 30, 40, 46, 48, 49, 51, 52, 60, 61, 62], "server": [1, 23, 60], "It": [1, 2, 6, 7, 9, 11, 14, 15, 20, 21, 25, 27, 28, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "print": [1, 12, 16, 20, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "serv": [1, 46, 48, 49, 60, 61, 62], "port": [1, 14, 17, 35, 36, 44, 46, 48, 49, 60, 61, 62], "8000": [1, 11, 15, 54], "open": [1, 4, 6, 7, 9, 20, 24, 26, 28, 29, 30, 39, 40, 43, 44], "browser": [1, 20, 21, 23, 46, 48, 49, 60, 61, 62], "go": [1, 7, 38, 40, 43, 46, 48, 49, 60, 61, 62, 64], "read": [2, 11, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "code": [2, 3, 8, 10, 13, 15, 17, 20, 24, 25, 28, 29, 30, 38, 43, 44, 46, 47, 51, 52, 54, 58, 61, 62], "style": [2, 3, 24], "adjust": [2, 56, 64, 65], "design": 2, "python": [2, 9, 13, 15, 17, 25, 26, 28, 29, 30, 31, 32, 33, 38, 40, 43, 46, 48, 49, 56, 60, 61, 62, 64, 65], "recommend": [2, 6, 7, 9, 25, 35, 36, 38, 40, 41, 43, 44, 46, 61, 62], "test": [2, 4, 9, 15, 18, 24, 26, 27, 34, 35, 36, 38, 40, 41, 43, 44, 47, 48, 51, 52, 56, 64, 65], "valid": [2, 25, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "dataset": [2, 10, 11, 13, 14, 25, 26, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "lhots": [2, 9, 11, 13, 15, 24, 26, 28, 29, 30, 38, 40, 43], "readthedoc": [2, 11, 25], "io": [2, 9, 11, 13, 15, 25, 27, 28, 29, 30, 31, 32, 33, 46, 60, 61, 62, 64], "en": [2, 11, 25, 28], "latest": [2, 9, 11, 13, 23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "index": [2, 25, 27, 28, 29, 30, 31, 32, 33, 60, 61, 62], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 24, 25, 42, 54, 66], "veri": [2, 3, 7, 13, 28, 29, 30, 35, 36, 40, 51, 52, 54, 61, 62], "good": [2, 7], "exampl": [2, 11, 13, 23, 24, 26, 28, 29, 30, 32, 33, 34, 44, 47, 51, 52, 54], "speech": [2, 11, 13, 14, 23, 24, 25, 27, 37, 39, 40, 54, 64, 65, 66], "pull": [2, 4, 6, 7, 9, 28, 29, 30, 31, 35, 36, 38, 40, 43, 56, 58], "380": [2, 28, 52], "show": [2, 4, 6, 7, 9, 15, 23, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "add": [2, 11, 28, 29, 30, 38, 40, 41, 61, 66], "new": [2, 3, 9, 13, 23, 25, 28, 29, 30, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 54, 60, 61, 62], "suppos": [2, 9, 61, 62], "would": [2, 11, 26, 28, 29, 30, 43, 47, 61, 62], "name": [2, 9, 10, 13, 15, 26, 28, 29, 30, 31, 38, 40, 46, 48, 49, 56, 61, 62], "foo": [2, 33, 38, 43, 46, 48, 49, 60, 61, 62], "eg": [2, 9, 10, 11, 12, 15, 16, 20, 22, 25, 26, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "mkdir": [2, 9, 28, 29, 38, 40, 41, 43, 47, 51, 52, 54], "p": [2, 4, 13, 20, 25, 28, 29, 40, 51, 52], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 66], "touch": 2, "sh": [2, 9, 11, 25, 26, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "chmod": 2, "x": [2, 4, 20, 30, 58], "simpl": [2, 12, 14, 16, 25, 40, 56], "own": [2, 11, 35, 36, 44, 46, 56, 61, 62], "otherwis": [2, 28, 29, 30, 36, 38, 40, 43, 44, 46, 48, 49, 60, 61, 62], "librispeech": [2, 4, 6, 7, 10, 20, 22, 24, 26, 28, 29, 30, 31, 32, 33, 35, 36, 42, 43, 44, 46, 47, 48, 49, 56, 57, 58, 60, 61, 62, 66], "assum": [2, 4, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 56, 60, 61, 62], "fanci": 2, "call": [2, 10, 31, 44, 56], "bar": [2, 33, 38, 43, 46, 48, 49, 60, 61, 62], "organ": 2, "wai": [2, 3, 15, 34, 46, 48, 49, 58, 60, 61, 62], "readm": [2, 38, 40, 41, 43, 47, 51, 52, 54], "md": [2, 22, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "asr_datamodul": [2, 9, 10, 15, 25], "pretrain": [2, 4, 6, 7, 15, 26, 28, 29, 30, 31, 33, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 63], "For": [2, 4, 6, 7, 9, 10, 14, 20, 22, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "instanc": [2, 9, 10, 12, 16, 22, 28, 29, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "tdnn": [2, 9, 10, 12, 15, 16, 25, 39, 42, 45, 50, 53], "its": [2, 4, 20, 26, 27, 28, 29, 30, 33, 40, 48, 56], "directori": [2, 9, 11, 13, 24, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "structur": [2, 30], "descript": [2, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "contain": [2, 8, 11, 13, 14, 15, 20, 24, 26, 27, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 66], "inform": [2, 4, 6, 11, 12, 16, 25, 26, 38, 40, 41, 43, 46, 47, 48, 51, 52, 54, 58, 60, 61, 62], "g": [2, 4, 5, 6, 7, 11, 13, 20, 25, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "wer": [2, 5, 9, 12, 15, 25, 26, 35, 36, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "etc": [2, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "provid": [2, 11, 15, 23, 25, 26, 27, 28, 29, 30, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 66], "pytorch": [2, 10, 13, 20, 25, 28, 29, 30, 40], "dataload": [2, 25], "take": [2, 7, 9, 26, 44, 46, 54, 56, 61, 62, 64, 65], "input": [2, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54, 58], "checkpoint": [2, 4, 6, 7, 12, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "save": [2, 15, 16, 25, 26, 29, 30, 32, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "dure": [2, 4, 5, 7, 10, 13, 20, 23, 26, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "stage": [2, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "": [2, 4, 6, 7, 9, 14, 15, 16, 20, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "definit": [2, 28, 29], "neural": [2, 4, 6, 7, 35, 38, 43, 56], "network": [2, 35, 38, 40, 43, 46, 48, 49, 56, 60, 61, 62], "script": [2, 6, 7, 13, 14, 24, 25, 33, 34, 38, 40, 41, 43, 44, 47, 51, 52, 54, 56, 60], "infer": [2, 26, 28, 29, 63], "tdnn_lstm_ctc": [2, 41, 47, 52], "conformer_ctc": [2, 38, 43], "get": [2, 9, 13, 14, 15, 18, 23, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 54, 58, 60, 61, 62, 64], "feel": [2, 44, 56, 60], "result": [2, 4, 7, 9, 16, 22, 23, 26, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "everi": [2, 26, 46, 48, 49, 60, 61, 62], "kept": [2, 46, 61, 62], "self": [2, 27, 30, 58], "toler": 2, "duplic": 2, "among": [2, 25], "differ": [2, 9, 12, 18, 25, 28, 29, 30, 31, 35, 36, 37, 38, 39, 43, 44, 46, 58, 60, 61, 62, 64], "invoc": [2, 28, 29], "help": [2, 12, 14, 16, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "blob": [2, 11, 22, 25, 26, 33, 46, 48, 49, 60, 61, 62], "master": [2, 6, 9, 11, 15, 22, 25, 26, 29, 30, 32, 33, 35, 36, 40, 44, 46, 48, 49, 60, 61, 62], "transform": [2, 6, 7, 38, 43, 60], "conform": [2, 32, 39, 40, 42, 45, 46, 48, 60, 61, 62], "base": [2, 4, 7, 13, 17, 24, 30, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62], "lstm": [2, 27, 33, 34, 39, 42, 45, 50, 57, 59], "attent": [2, 30, 40, 41, 44, 58, 61, 62], "lm": [2, 4, 5, 7, 9, 11, 24, 25, 40, 46, 47, 51, 52, 54, 56, 61, 62, 66], "rescor": [2, 5, 24, 41, 47, 49, 51, 52, 54, 56], "demonstr": [2, 14, 15, 21, 23, 26, 31, 35], "consid": [2, 4, 30, 36], "colab": [2, 20, 25], "notebook": [2, 20, 25], "welcom": 3, "There": [3, 4, 15, 28, 29, 30, 31, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "mani": [3, 12, 25, 61, 62], "two": [3, 4, 11, 14, 15, 18, 20, 24, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 65], "them": [3, 5, 6, 21, 22, 23, 28, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "To": [3, 4, 5, 6, 7, 9, 11, 15, 20, 23, 25, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "document": [3, 24, 26, 27, 28, 29, 30, 31, 49, 64], "repositori": [3, 9, 28, 29, 30, 31], "recip": [3, 4, 6, 7, 9, 11, 15, 22, 24, 25, 26, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "In": [3, 4, 6, 10, 15, 23, 25, 26, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 43, 44, 47, 51, 52, 54, 58], "page": [3, 23, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 66], "describ": [3, 5, 8, 9, 17, 18, 20, 21, 26, 28, 29, 31, 32, 33, 34, 38, 40, 41, 43, 46, 47, 51, 52, 61, 62, 64], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 18, 20, 21, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "creat": [3, 4, 6, 7, 14, 15, 18, 24, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 18, 24, 26, 28, 29, 30, 31, 32, 33, 37, 39, 56, 63], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 18, 20, 21, 23, 24, 26, 27, 32, 33, 34, 55, 58, 63, 66], "decod": [3, 4, 8, 10, 11, 14, 15, 17, 20, 23, 24, 28, 29, 30, 33, 34, 36, 37, 56], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 18, 20, 21, 23, 24, 25, 27, 44, 55, 58, 63, 66], "As": [4, 5, 6, 7, 28, 40, 43, 44, 56], "type": [4, 6, 7, 9, 11, 15, 20, 25, 26, 28, 29, 30, 38, 40, 43, 46, 48, 49, 54, 58, 60, 61, 62, 64], "e2": [4, 7, 25, 56], "usual": [4, 6, 7, 12, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62, 64, 65], "an": [4, 5, 6, 7, 9, 11, 13, 15, 18, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 38, 39, 40, 43, 44, 46, 49, 54, 55, 60, 61, 62, 64, 65, 66], "intern": [4, 5], "languag": [4, 7, 11, 23, 24, 38, 40, 41, 55, 64, 66], "learn": [4, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "level": [4, 5, 15, 20, 56], "corpu": [4, 6, 7, 39, 56], "real": 4, "life": 4, "scenario": 4, "often": [4, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "mismatch": [4, 36, 61], "between": [4, 7, 18, 35, 46, 61, 62], "target": [4, 23, 25, 35], "space": [4, 21, 24, 56], "problem": [4, 6, 7, 25, 44], "when": [4, 6, 9, 10, 15, 23, 28, 29, 30, 34, 40, 43, 44, 46, 48, 49, 56, 61, 62], "act": 4, "against": [4, 25], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 18, 20, 24, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62, 64, 65], "low": [4, 28, 29, 37, 64], "order": [4, 13, 17, 25, 28, 29, 30, 38, 41, 43, 47, 51, 52], "densiti": 4, "ratio": [4, 20], "allevi": 4, "effect": [4, 7, 30, 35], "improv": [4, 5, 6, 7, 35, 37, 40, 56], "perform": [4, 6, 7, 18, 27, 35, 36, 37, 40, 44, 61], "languga": 4, "integr": [4, 23, 35], "pruned_transducer_stateless7_stream": [4, 6, 7, 30, 31, 62], "stream": [4, 6, 7, 15, 17, 24, 27, 28, 29, 31, 34, 38, 43, 51, 52, 60, 66], "howev": [4, 6, 7, 26, 29, 37, 44], "easili": [4, 6, 7, 35, 38, 41, 43], "appli": [4, 6, 7, 40, 58], "other": [4, 7, 9, 13, 14, 15, 20, 26, 29, 30, 31, 35, 40, 43, 44, 46, 47, 51, 52, 54, 58, 61, 62, 66], "encount": [4, 6, 7, 10, 25, 30, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "here": [4, 6, 7, 20, 26, 28, 29, 30, 38, 40, 41, 43, 44, 47, 58, 61], "simplic": [4, 6, 7], "same": [4, 6, 7, 20, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "domain": [4, 6, 7, 35, 36, 37], "gigaspeech": [4, 6, 7, 22, 32, 35, 36, 60], "first": [4, 6, 9, 10, 11, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "let": [4, 6, 7, 14, 25, 28, 29, 30, 35, 36, 38, 43, 56], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 40, 58, 62], "address": [4, 9, 15, 23, 25, 26, 28, 29, 30, 40, 46, 49, 60, 61, 62], "sourc": [4, 11, 13, 25, 26, 28, 29, 30, 38, 39, 40, 43], "acoust": [4, 61, 62], "similar": [4, 5, 36, 44, 48, 61, 62], "deriv": 4, "formula": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 20, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "score": [4, 5, 7, 38, 43, 46, 61, 62], "left": [4, 28, 30, 35, 40, 61, 62], "y_u": 4, "mathit": 4, "y": [4, 20], "right": [4, 28, 40, 58, 61], "log": [4, 9, 10, 12, 15, 16, 25, 28, 29, 30, 35, 47, 51, 52, 54, 64, 65], "y_": 4, "u": [4, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 54], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 9, 10, 61], "weight": [4, 15, 38, 41, 43, 48, 49, 56, 60], "respect": 4, "onli": [4, 6, 8, 11, 13, 14, 15, 20, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "compar": [4, 17, 28, 29, 30, 36, 61], "shallow": [4, 5, 24, 56], "fusion": [4, 5, 24, 56], "subtract": [4, 5], "work": [4, 9, 13, 15, 28, 29, 30, 43], "treat": [4, 29, 30], "predictor": 4, "joiner": [4, 28, 29, 30, 31, 33, 35, 36, 40, 46, 60, 61, 62], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 20, 25, 38, 44, 46, 48, 49, 51, 52, 60, 61, 62], "gram": [4, 6, 25, 38, 40, 41, 46, 47, 49, 51, 52, 61, 62], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "rnnt": [4, 46, 61, 62], "bi": [4, 6], "addit": [4, 37], "estim": 4, "li": 4, "choic": 4, "accord": [4, 56], "origin": [4, 5, 35, 36, 37], "paper": [4, 5, 35, 44, 46, 60, 61, 62, 64, 65], "achiev": [4, 6, 7, 35, 36, 56, 58], "both": [4, 36, 46, 48, 49, 58, 60, 61, 62], "intra": 4, "cross": 4, "much": [4, 28, 29, 35, 36], "faster": [4, 6, 35, 64], "evalu": 4, "now": [4, 6, 9, 13, 15, 20, 25, 28, 29, 30, 38, 43, 44, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62], "illustr": [4, 6, 7, 35, 36, 56], "purpos": [4, 6, 7, 28, 29, 35, 36, 56], "from": [4, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "link": [4, 6, 7, 22, 25, 26, 27, 46, 48, 49, 60, 61, 62, 64, 65], "scratch": [4, 6, 7, 36, 46, 48, 49, 56, 60, 61, 62, 64, 65], "prune": [4, 6, 7, 26, 30, 31, 40, 42, 44, 45, 57, 58, 59, 60, 62], "statelessx": [4, 6, 7, 42, 44, 45, 57, 58, 59], "initi": [4, 6, 7, 9, 35, 36, 38, 41], "step": [4, 6, 7, 11, 14, 20, 25, 26, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 48, 49, 54, 56, 60, 61, 62], "download": [4, 6, 7, 8, 10, 13, 15, 20, 23, 24, 27, 34, 35, 36, 39, 44, 56, 63], "git_lfs_skip_smudg": [4, 6, 7, 28, 29, 30, 31, 35, 36, 56], "huggingfac": [4, 6, 7, 13, 22, 24, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 48, 49, 51, 52, 54, 56, 60, 64, 65], "co": [4, 6, 7, 22, 23, 25, 26, 28, 29, 30, 31, 35, 36, 38, 39, 40, 41, 43, 47, 48, 49, 51, 52, 54, 56, 60, 64, 65], "zengwei": [4, 6, 7, 28, 30, 31, 35, 36, 49, 56, 60, 64], "stateless7": [4, 6, 7, 30, 31], "2022": [4, 6, 7, 26, 28, 29, 30, 31, 40, 46, 48, 49, 60, 61], "12": [4, 6, 7, 9, 14, 20, 25, 26, 28, 29, 30, 31, 35, 38, 40, 41, 43, 46, 48, 49, 51, 54, 60, 61, 62, 64, 65], "29": [4, 6, 7, 20, 25, 30, 31, 38, 40, 41, 43, 47, 48, 51, 52], "exp": [4, 6, 7, 9, 15, 16, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "lf": [4, 6, 7, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 49, 51, 52, 54, 56], "includ": [4, 6, 7, 28, 29, 30, 31, 35, 36, 46, 48, 49, 56, 60, 61, 62], "pt": [4, 6, 7, 9, 11, 15, 20, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "ln": [4, 6, 7, 9, 15, 26, 28, 29, 30, 31, 35, 36, 38, 43, 46, 48, 49, 56, 60, 61, 62], "epoch": [4, 6, 7, 9, 12, 15, 16, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "99": [4, 6, 7, 15, 20, 25, 28, 29, 30, 31, 35, 36], "symbol": [4, 5, 6, 7, 20, 25, 40, 46, 61, 62], "load": [4, 6, 7, 9, 15, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "lang_bpe_500": [4, 6, 7, 26, 28, 29, 30, 31, 32, 33, 35, 36, 43, 46, 48, 49, 56, 60, 61, 62], "bpe": [4, 5, 6, 7, 26, 28, 29, 30, 31, 33, 35, 36, 43, 46, 48, 49, 56, 60, 61, 62], "done": [4, 6, 7, 9, 13, 15, 25, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "via": [4, 6, 7, 14, 25, 27, 32, 33, 34, 35, 36, 56], "exp_dir": [4, 6, 7, 9, 15, 25, 28, 29, 30, 40, 43, 44, 46, 48, 49, 61, 62], "avg": [4, 6, 7, 9, 12, 15, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 40, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "averag": [4, 6, 7, 9, 12, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "fals": [4, 6, 7, 9, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 43, 44], "dir": [4, 6, 7, 20, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "max": [4, 6, 7, 25, 26, 28, 29, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 64, 65], "durat": [4, 6, 7, 11, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "600": [4, 6, 7, 25, 26, 35, 43, 46, 48, 60, 61, 62], "chunk": [4, 6, 7, 28, 30, 31, 35, 61, 62], "len": [4, 6, 7, 20, 30, 31, 62], "32": [4, 6, 7, 20, 25, 28, 29, 30, 31, 35, 38, 40, 41, 62], "method": [4, 5, 7, 15, 23, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 60, 61, 62], "modified_beam_search": [4, 5, 6, 7, 23, 40, 44, 46, 48, 60, 61, 62], "clean": [4, 9, 15, 25, 30, 35, 38, 40, 43, 44, 46, 47, 48, 49, 60, 61, 62], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 20, 25, 28, 29, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "best": [4, 5, 6, 7, 28, 29, 30, 35, 36, 38, 41, 43], "7": [4, 6, 7, 9, 20, 25, 26, 27, 30, 34, 38, 41, 43, 46, 47, 51, 52, 60, 61], "93": [4, 6, 7, 15, 20], "Then": [4, 6], "necessari": [4, 44, 56], "note": [4, 5, 6, 7, 10, 11, 15, 17, 20, 26, 28, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "960": [4, 35, 36, 43, 46, 48, 49, 60, 61, 62], "hour": [4, 13, 35, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 31], "popd": [4, 6, 7, 31], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 17, 24, 25, 40, 54], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 25, 43], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 20, 25, 29, 35, 38, 43, 54], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 20, 25, 28, 29, 41, 47, 51, 52, 54], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 28, 29, 38, 43, 44, 47, 49, 51, 52], "embed": [4, 6, 7, 40, 46, 56, 60, 61, 62], "dim": [4, 6, 7, 28, 29, 30, 35, 40, 46, 56, 61], "2048": [4, 6, 7, 26, 28, 29, 30, 40, 56], "hidden": [4, 6, 7, 29, 56, 60], "num": [4, 6, 7, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62, 64, 65], "layer": [4, 6, 7, 28, 29, 30, 35, 40, 44, 46, 56, 58, 60, 61, 62], "vocab": [4, 6, 7, 43], "500": [4, 6, 7, 26, 28, 29, 30, 40, 43, 49, 60, 64, 65], "token": [4, 11, 20, 26, 28, 29, 30, 31, 32, 33, 35, 38, 40, 41, 43, 47, 51, 52, 54, 56, 64, 65], "ngram": [4, 43, 47, 51, 52], "2": [4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 27, 34, 35, 36, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "extra": [4, 28, 29, 30, 40, 58, 61, 63], "argument": [4, 7, 15, 35, 36, 44, 58], "need": [4, 6, 11, 13, 14, 15, 17, 20, 23, 25, 26, 27, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62], "given": [4, 9, 11, 12, 13, 15, 20, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 61, 62, 64, 65], "specifi": [4, 7, 10, 12, 15, 16, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "neg": [4, 40], "number": [4, 7, 16, 23, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "obtain": [4, 7, 38, 40, 41, 43, 47, 51, 52], "shown": [4, 7, 35], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 20, 25, 28, 29, 30, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 64], "61": [4, 6, 20], "6": [4, 6, 7, 9, 10, 11, 15, 20, 27, 34, 38, 40, 43, 46, 47, 51, 52, 60, 65], "74": [4, 6, 20, 25, 26], "recal": 4, "lowest": [4, 12, 15, 46, 48, 49, 60, 61, 62], "77": [4, 6, 7, 20, 25, 43], "08": [4, 6, 7, 9, 15, 20, 30, 43, 47, 49, 51, 52, 54, 60], "inde": 4, "even": [4, 23, 25, 29], "better": [4, 6], "increas": [4, 6, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "8": [4, 6, 7, 9, 10, 15, 20, 25, 26, 28, 29, 30, 35, 38, 40, 43, 44, 46, 47, 48, 49, 54, 60, 61, 62], "45": [4, 6, 15, 20, 25, 28, 30, 38, 40, 43], "38": [4, 6, 20, 25, 28, 38, 40, 43, 51], "23": [4, 6, 9, 10, 11, 15, 20, 25, 28, 29, 30, 35, 38, 40, 41, 43, 51, 52, 54], "section": [5, 8, 9, 10, 18, 20, 21, 25, 26, 31, 32, 33, 34, 38, 43], "langugag": 5, "transduc": [5, 24, 26, 27, 31, 34, 35, 36, 39, 42, 44, 45, 56, 57, 58, 59], "rnn": [5, 6, 7, 24, 29, 40, 46, 48, 60, 61, 62, 66], "avail": [5, 6, 8, 15, 24, 25, 26, 28, 29, 30, 36, 37, 38, 40, 43, 47, 51, 52, 54, 60], "beam": [5, 26, 60], "search": [5, 6, 7, 22, 23, 63], "realli": [5, 38, 41, 43, 46, 48, 49, 60, 61, 62], "valu": [5, 7, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "t": [5, 13, 14, 15, 17, 20, 25, 28, 29, 30, 31, 32, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "doe": [5, 15, 17, 20, 28, 29, 30, 38, 40, 43, 54], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 21, 22, 25, 26, 27, 28, 29, 30, 31, 33, 35, 38, 40, 41, 43, 46, 48, 49, 54, 56, 58, 60, 61, 62, 64], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 56], "re": [5, 6, 10, 38, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 24, 56], "commonli": [6, 7, 38, 40, 41, 43, 47, 51, 52, 54], "approach": [6, 18, 20, 24], "incorpor": 6, "unlik": 6, "more": [6, 14, 25, 28, 29, 30, 35, 38, 43, 44, 54, 56, 58, 60, 61, 64, 65], "effici": [6, 7, 35, 46, 61, 62], "than": [6, 25, 26, 29, 35, 38, 40, 41, 43, 46, 47, 48, 49, 54, 60, 61, 62], "sinc": [6, 13, 20, 25, 28, 29, 30, 36, 44, 54, 60], "less": [6, 26, 35, 43, 47, 54, 61, 62], "comput": [6, 15, 18, 25, 26, 28, 29, 30, 38, 40, 41, 44, 46, 47, 49, 51, 52, 54, 60, 61, 62], "gpu": [6, 7, 8, 13, 14, 24, 25, 28, 29, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 51, 52, 54, 60, 61, 62], "try": [6, 10, 12, 15, 21, 23, 44, 46, 48, 49, 60, 61, 62], "might": [6, 7, 29, 30, 61, 62], "ideal": [6, 7], "mai": [6, 7, 9, 25, 28, 29, 30, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62, 66], "With": [6, 25], "43": [6, 9, 20, 29, 30, 43], "great": 6, "made": [6, 28], "boost": [6, 7], "tabl": [6, 17, 23, 28, 29, 30], "67": [6, 20, 25], "59": [6, 15, 20, 25, 28, 41, 43], "86": [6, 20], "fact": 6, "arpa": [6, 11, 54], "performn": 6, "depend": [6, 14, 15, 17, 25, 38, 43, 63], "kenlm": 6, "kpu": 6, "archiv": [6, 56], "zip": 6, "execut": [6, 7, 13, 28, 35, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "9": [6, 9, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 54, 60, 61, 62], "57": [6, 20, 25, 29, 43, 47], "slightli": 6, "63": [6, 20, 40], "04": [6, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52], "52": [6, 20, 25, 38, 43], "73": [6, 20], "mention": [6, 58], "earlier": 6, "benchmark": [6, 40], "speed": [6, 28, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "132": [6, 20], "95": [6, 20, 39], "177": [6, 25, 26, 29, 30, 40, 41, 43], "96": [6, 20, 25, 35], "210": [6, 51, 52], "262": [6, 7, 15], "62": [6, 7, 20, 25, 43, 47], "65": [6, 7, 20, 25, 28], "352": [6, 7, 43], "58": [6, 7, 10, 20, 25, 43], "488": [6, 7, 28, 29, 30], "400": [6, 9, 39], "610": 6, "870": 6, "156": [6, 15, 20], "203": [6, 15, 26, 43], "255": [6, 29, 30], "160": [6, 15, 20], "263": [6, 9, 15, 25, 29], "singl": [6, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "32g": 6, "v100": [6, 38, 40, 41, 43], "vari": 6, "word": [7, 11, 12, 15, 18, 38, 40, 41, 43, 47, 51, 52, 54, 56], "error": [7, 9, 10, 12, 13, 15, 25, 28, 29, 30, 43], "rate": [7, 12, 20, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "These": [7, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "alreadi": [7, 11, 13, 25, 26, 37], "But": [7, 28, 46, 48, 49, 60, 61, 62], "long": [7, 28, 56, 64, 65], "true": [7, 9, 15, 25, 26, 28, 29, 30, 35, 36, 38, 40, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "either": [7, 15, 23, 38, 40, 41, 43, 61, 62], "choos": [7, 23, 25, 44, 46, 48, 49, 60, 61, 62], "three": [7, 15, 28, 29, 30, 33, 38, 40, 58], "associ": 7, "dimens": [7, 35, 46, 56, 61, 62], "obviou": 7, "rel": [7, 37], "reduct": [7, 15, 25, 28, 29, 48], "around": [7, 36], "A": [7, 14, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 60, 61, 62], "few": [7, 11, 28, 29, 30, 44], "paramet": [7, 14, 26, 28, 29, 30, 32, 35, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62, 64, 65], "tune": [7, 24, 28, 29, 30, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 66], "control": [7, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "too": 7, "small": [7, 35, 36, 37, 40, 51, 52, 54], "fulli": 7, "util": [7, 9, 10, 15, 20, 25, 43], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 35, 38, 40, 41, 43], "activ": [7, 13, 23, 25], "path": [7, 9, 15, 23, 25, 26, 28, 29, 30, 33, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "trade": 7, "off": [7, 28], "accuraci": [7, 28, 29, 37, 39], "larger": [7, 29, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "slower": 7, "built": [8, 9, 25, 64], "imag": [8, 24], "cpu": [8, 12, 13, 14, 15, 16, 17, 20, 24, 25, 26, 28, 29, 30, 32, 38, 46, 48, 49, 54, 61, 62, 64], "still": [8, 28, 29, 30, 37], "introduct": [8, 24, 57, 66], "tag": [8, 24], "cuda": [8, 10, 15, 17, 20, 24, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62], "enabl": [8, 25, 44], "within": [8, 14, 21, 23, 24, 28, 29], "updat": [8, 28, 29, 30, 35], "host": [9, 26], "hub": [9, 20], "k2fsa": 9, "find": [9, 10, 16, 21, 22, 23, 26, 28, 29, 30, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "dockerfil": 9, "tree": [9, 11, 32, 33, 38, 40, 41, 43, 47, 51, 52, 54, 60], "item": [9, 14, 20], "curl": 9, "registri": 9, "v2": [9, 30, 38, 43], "jq": 9, "give": [9, 11, 15, 20, 40], "someth": [9, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 25], "torch1": [9, 10, 25], "cuda10": 9, "13": [9, 10, 15, 20, 25, 26, 28, 29, 30, 36, 40, 41, 43, 47, 48, 51], "releas": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 64], "torch": [9, 10, 13, 14, 20, 24, 26, 27, 34, 38, 40, 43], "select": [9, 12, 13, 14, 23, 25, 28, 29, 30, 46, 47, 51, 52, 54, 60, 61, 62], "appropri": [9, 25], "combin": [9, 12, 28, 29, 30], "visit": [9, 22, 23, 46, 48, 49, 60, 61, 62, 64, 65], "pkg": 9, "py3": [9, 10, 25], "v1": [9, 38, 41, 43, 47, 51, 52], "current": [9, 23, 28, 29, 40, 44, 58, 60, 61, 62, 64, 65, 66], "ghcr": 9, "alwai": [9, 25, 26], "sudo": [9, 38, 41], "rm": 9, "bin": [9, 13, 25, 28, 29, 30, 38, 43], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 20, 23, 25, 26, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "interfac": 9, "present": [9, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "root": [9, 20, 28, 29, 30, 56], "60c947eac59c": 9, "workspac": 9, "export": [9, 10, 11, 12, 13, 14, 16, 24, 25, 37, 38, 40, 41, 43, 44, 47, 51, 52, 54, 63], "pythonpath": [9, 11, 12, 13, 15, 16, 25, 28, 29, 30], "user": [9, 10, 13, 25], "copi": [9, 20, 25, 58], "switch": [9, 25, 28, 29, 30, 38, 43, 49], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 25, 30], "site": [9, 10, 15, 25, 30], "packag": [9, 10, 15, 25, 30, 64, 65], "__init__": [9, 10, 15, 25, 26, 28, 29, 30, 38, 40, 43], "line": [9, 10, 11, 28, 29, 30, 46, 56, 61, 62], "modul": [9, 13, 24, 28, 30, 35, 48, 61], "_k2": [9, 10, 25], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 24], "libcuda": 9, "cannot": [9, 24, 28, 29, 30], "share": [9, 24, 25], "object": [9, 24, 25, 38, 40, 41, 46, 54, 60, 61], "No": [9, 13, 17, 24, 28, 29, 30, 54], "stub": 9, "list": [9, 15, 20, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52], "16": [9, 15, 20, 25, 26, 28, 29, 30, 33, 35, 38, 40, 41, 43, 46, 47, 51, 52, 54, 60, 61, 62], "second": [9, 14, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "2023": [9, 15, 25, 28, 29, 30, 35, 36, 48, 56, 65], "01": [9, 11, 15, 25, 28, 40, 41, 43, 44, 48], "02": [9, 11, 25, 26, 28, 29, 30, 35, 40, 43, 46, 52, 60, 61, 64], "06": [9, 15, 25, 26, 28, 35, 36, 41, 43, 47, 54], "info": [9, 15, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 47, 51, 52, 54], "264": [9, 25, 30], "posixpath": [9, 15, 25, 28, 29, 30, 40, 43], "lang_dir": [9, 15, 25, 40, 43], "lang_phon": [9, 11, 15, 25, 41, 47, 51, 52, 54], "feature_dim": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 54], "search_beam": [9, 15, 25, 38, 43, 54], "20": [9, 14, 15, 20, 25, 26, 28, 30, 35, 36, 38, 40, 41, 43, 46, 47, 51, 52, 54, 56, 61], "output_beam": [9, 15, 25, 38, 43, 54], "min_active_st": [9, 15, 25, 38, 43, 54], "30": [9, 10, 15, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "max_active_st": [9, 15, 25, 38, 43, 54], "10000": [9, 15, 25, 38, 43, 54], "use_double_scor": [9, 15, 25, 38, 43, 54], "14": [9, 10, 15, 20, 25, 26, 28, 29, 32, 38, 43, 46, 47, 48, 51, 60, 61, 62], "feature_dir": [9, 15, 25, 43], "fbank": [9, 11, 15, 25, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54], "max_dur": [9, 15, 25, 43], "bucketing_sampl": [9, 15, 25, 43], "num_bucket": [9, 15, 25, 43], "concatenate_cut": [9, 15, 25, 43], "duration_factor": [9, 15, 25, 43], "gap": [9, 15, 25, 43], "on_the_fly_feat": [9, 15, 25, 43], "shuffl": [9, 15, 25, 43], "return_cut": [9, 15, 25, 43], "num_work": [9, 15, 25, 43], "env_info": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "sha1": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 25], "date": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "tue": [9, 25, 28, 43], "jul": [9, 15, 25], "25": [9, 15, 20, 25, 26, 28, 29, 38, 43, 46, 51, 52, 54, 61], "36": [9, 20, 25, 28, 40, 43, 44], "dev": [9, 10, 15, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "7640d663": 9, "branch": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 48], "375520d": 9, "fri": [9, 26], "28": [9, 20, 25, 28, 29, 40, 43, 47, 64], "07": [9, 25, 28, 29, 30, 38, 40, 41, 43], "hostnam": [9, 15, 25, 26, 28, 29, 30, 40], "ip": [9, 15, 25, 26, 28, 29, 30, 40], "172": 9, "17": [9, 20, 25, 26, 28, 29, 30, 38, 43, 51, 52, 60], "401": 9, "lexicon": [9, 11, 15, 18, 25, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "168": [9, 15, 20, 25, 47], "compil": [9, 15, 25, 28, 29, 38, 40, 43], "linv": [9, 11, 15, 25, 40, 43, 54], "403": [9, 47], "273": [9, 15, 25, 26, 40], "devic": [9, 15, 20, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 61, 62], "406": [9, 43], "291": [9, 25], "424": 9, "218": [9, 15, 25, 29], "about": [9, 11, 12, 14, 15, 16, 20, 25, 28, 29, 30, 35, 40, 44, 46, 49, 60, 61, 62], "cut": [9, 15, 25, 43], "425": [9, 29, 43], "252": [9, 25], "504": 9, "204": [9, 25, 30, 43], "batch": [9, 15, 17, 25, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "process": [9, 15, 17, 25, 26, 28, 29, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "until": [9, 15, 25, 43, 48], "w": [9, 20, 25, 43, 51, 52], "nnpack": 9, "cpp": [9, 28, 32], "53": [9, 15, 20, 25, 30, 38, 46, 47, 52, 60, 61], "could": [9, 28, 29, 30, 35, 36, 37, 38, 41, 56], "reason": [9, 14, 26, 28, 29, 30, 36, 61], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 25, 38], "transcript": [9, 15, 18, 25, 38, 39, 40, 41, 43, 46, 47, 51, 52, 60, 61, 62], "store": [9, 11, 15, 25, 43, 56], "recog": [9, 15, 25, 40, 43], "test_set": [9, 15, 25, 54], "688": 9, "564": [9, 15, 25], "240": [9, 15, 25, 38, 54], "ins": [9, 15, 25, 43, 54], "del": [9, 15, 20, 25, 43, 54], "sub": [9, 15, 25, 43, 54], "690": 9, "249": [9, 25, 29], "wrote": [9, 15, 25, 43], "detail": [9, 11, 15, 20, 25, 27, 31, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "stat": [9, 15, 25, 43], "err": [9, 15, 25, 40, 43], "316": [9, 25, 43], "congratul": [9, 13, 20, 25, 28, 29, 30, 38, 41, 43, 47, 51, 52, 54, 64], "finish": [9, 14, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62], "successfulli": [9, 13, 25, 28, 29, 30, 64], "collect": [10, 13, 25, 56], "post": 10, "correspond": [10, 22, 23], "solut": 10, "One": 10, "torchaudio": [10, 13, 18, 20, 24, 58], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 20, 25, 51, 52, 64], "org": [10, 13, 20, 25, 39, 40, 46, 56, 60, 61, 62], "whl": [10, 13, 25], "torch_stabl": [10, 13, 25], "throw": [10, 28, 29, 30], "while": [10, 16, 25, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62], "That": [10, 11, 14, 15, 16, 17, 28, 29, 44, 46, 60, 61, 62], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 61, 62], "recent": 10, "last": 10, "yesnoasrdatamodul": 10, "home": [10, 20, 28, 29, 38, 43], "xxx": [10, 20, 26, 28, 29, 30], "next": [10, 13, 14, 23, 25, 28, 29, 30, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62], "gen": [10, 13, 14, 23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "kaldi": [10, 11, 13, 14, 17, 18, 23, 24, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "34": [10, 20, 28, 29], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 20, 25, 28, 30, 40, 43, 47, 51], "tensorboard": [10, 16, 25, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 25], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "linux": [10, 13, 14, 23, 25, 27, 28, 29, 30, 31], "x86_64": [10, 25, 28], "egg": 10, "handl": [10, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "except": [10, 26], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 48], "104": [10, 15, 20, 25], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 23, 27, 28, 29, 30, 31], "probabl": [10, 40, 46, 48, 60, 61, 62], "variabl": [10, 12, 13, 16, 25, 28, 29, 30, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 28], "libpython": 10, "abl": 10, "insid": [10, 33], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 24, 25, 28, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62, 64, 65], "everyth": [11, 20, 27], "tmp": [11, 12, 13, 15, 16, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "each": [11, 15, 18, 26, 28, 29, 31, 35, 38, 40, 41, 43, 46, 48, 49, 56, 58, 60, 61, 62], "exist": 11, "anyth": [11, 21, 23], "els": [11, 20], "wonder": [11, 15], "url": [11, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "varieti": 11, "folder": [11, 25, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "wav": [11, 15, 20, 26, 28, 29, 30, 31, 33, 38, 40, 41, 43, 46, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "scp": 11, "feat": 11, "put": [11, 13, 25, 28, 29, 48, 61], "l": [11, 20, 25, 28, 29, 30, 40, 51, 52, 54], "waves_yesno": [11, 15, 25], "tar": [11, 25, 64], "gz": [11, 25, 56], "l41": 11, "extract": [11, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "yesno_cuts_test": 11, "jsonl": [11, 26], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 25, 47, 51, 52, 54], "l_disambig": [11, 54], "lexicon_disambig": [11, 20, 54], "manifest": [11, 25, 35, 36, 44], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 51, 52, 60, 61, 62], "thei": [11, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "idea": [11, 15, 20, 58], "examin": 11, "relat": [11, 18, 26, 35, 36, 38, 40, 43, 47, 51, 52, 54, 64, 65], "gunzip": 11, "c": [11, 17, 20, 25, 40, 41, 46, 48, 49, 54, 60, 61, 62, 64], "head": [11, 20, 25, 35, 40, 58], "output": [11, 12, 13, 15, 20, 26, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "id": [11, 38, 41, 43, 47, 51, 52], "0_0_0_0_1_1_1_1": 11, "channel": [11, 23, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 20, 25, 26, 28, 29, 30, 40, 43, 60], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 20, 25, 51, 52, 64], "l300": 11, "mean": [11, 14, 15, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "field": [11, 39], "per": [11, 40, 46, 61, 62], "recording_id": 11, "NO": [11, 15, 54], "ye": [11, 15, 17, 54], "hebrew": [11, 54], "supervis": [11, 24, 37, 66], "l510": 11, "furthermor": [11, 40], "featur": [11, 17, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "compress": [11, 25], "lilcom": [11, 25], "cutset": [11, 36], "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": [11, 20], "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 23, 29, 30, 38, 39, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "separ": [11, 31, 56], "lang": [11, 20, 25, 26, 40, 43, 49], "quit": [12, 14, 16, 37, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "cuda_visible_devic": [12, 16, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "usag": [12, 15, 16, 26, 28, 29, 30, 32, 33, 47, 51, 52, 54, 63], "one": [12, 23, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "tini": [13, 14], "well": [13, 26, 35, 54, 66], "hundr": 13, "thousand": 13, "virtualenv": [13, 25], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 26], "pkg_resourc": 13, "wheel": [13, 25, 28], "remeb": 13, "continu": [13, 15, 20, 28, 29, 30, 31, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "caution": [13, 38, 43], "matter": [13, 25, 28], "torchaduio": 13, "from_wheel": [13, 15, 25], "dev20231220": 13, "china": [13, 25, 39], "\u4e2d\u56fd\u56fd\u5185\u7528\u6237": [13, 25], "\u5982\u679c\u8bbf\u95ee\u4e0d\u4e86": [13, 25], "\u8bf7\u4f7f\u7528": [13, 25], "cn": [13, 25], "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 17, 20, 25, 28, 29, 30, 32, 36, 38, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "walk": [14, 20], "recognit": [14, 23, 24, 27, 28, 29, 37, 39, 40, 54, 66], "system": [14, 56], "out": [14, 44, 56], "minut": [14, 56], "sequenti": 14, "part": [14, 15, 23, 25, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "window": [14, 23, 27, 28, 29, 30, 31], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 24, 34, 38, 40, 41, 43, 47, 51, 52, 54], "jit": [14, 24, 27, 34, 43], "onnx": [14, 24, 26, 34, 35, 63, 65], "torchscript": [15, 27, 32, 33, 34], "trace": [15, 24, 27, 32, 34], "explain": 15, "kind": [15, 43, 46, 48, 49, 60, 61, 62], "produc": [15, 27, 46, 48, 49, 60, 61, 62], "03": [15, 25, 26, 29, 35, 40, 43, 51, 52, 60, 64], "912": [15, 26], "76": [15, 20, 25, 54], "lr": [15, 25, 35, 36, 40, 60], "weight_decai": [15, 25], "1e": [15, 25], "start_epoch": [15, 25], "best_train_loss": [15, 25, 26, 28, 29, 30], "inf": [15, 25, 26, 28, 29, 30], "best_valid_loss": [15, 25, 26, 28, 29, 30], "best_train_epoch": [15, 25, 26, 28, 29, 30], "best_valid_epoch": [15, 25, 26, 29, 30], "batch_idx_train": [15, 25, 26, 28, 29, 30], "log_interv": [15, 25, 26, 28, 29, 30], "reset_interv": [15, 25, 26, 28, 29, 30], "valid_interv": [15, 25, 26, 28, 29, 30], "beam_siz": [15, 25, 26, 40], "sum": [15, 20, 25], "913": 15, "950": 15, "971": [15, 52], "106": [15, 20, 25, 29, 43], "Not": 15, "974": 15, "111": [15, 20, 25, 43], "kei": [15, 28, 29, 30, 43], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 20, 25, 28, 29, 35, 38, 40], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 25, 28, 38, 40, 43], "26": [15, 20, 25, 28, 29, 30, 40, 43, 52], "09": [15, 26, 29, 38, 40, 41, 43, 60], "aa073f6": 15, "none": [15, 20, 25, 38, 43], "9a47c08": 15, "mon": [15, 29, 30], "aug": [15, 44], "50": [15, 20, 25, 26, 28, 29, 30, 43, 46, 51, 60, 61, 62], "privat": 15, "fangjun": [15, 19, 25, 26, 28, 29, 30, 40, 43], "macbook": 15, "pro": [15, 38, 43], "127": [15, 20, 25, 28, 29, 54], "092": 15, "103": [15, 20], "272": 15, "109": [15, 20, 25, 38, 43], "112": [15, 20, 28, 29, 30], "115": [15, 20, 28, 29, 38, 43], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 40], "559": 15, "315": [15, 28, 38, 40, 41, 43, 47], "ident": [15, 20], "kaldifeat": 15, "csukuangfj": [15, 25, 26, 28, 29, 31, 38, 40, 41, 43, 47, 51, 52, 54, 60, 64], "dev20231221": 15, "0_0_0_1_0_0_0_1": [15, 54], "0_0_1_0_0_0_1_0": [15, 54], "19": [15, 20, 26, 28, 29, 30, 35, 36, 38, 43, 47, 51, 52], "208": [15, 43], "136": [15, 20, 43], "num_class": [15, 38, 43, 54], "sample_r": [15, 20, 26, 38, 40, 43, 54], "words_fil": [15, 38, 43, 54], "sound_fil": [15, 26, 38, 40, 43, 54], "142": [15, 20, 28, 38, 41, 43], "144": [15, 20, 43], "212": 15, "213": [15, 54], "construct": [15, 20, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54], "170": [15, 47], "sound": [15, 26, 28, 29, 30, 33, 34, 38, 40, 41, 43, 47, 51, 52, 54], "224": 15, "176": [15, 28, 40, 43], "304": [15, 29], "214": [15, 40, 43], "47": [15, 20, 25, 28, 29, 30, 36, 38, 43], "44": [15, 20, 25, 28, 29, 35, 43, 51, 52], "666": 15, "667": 15, "670": 15, "677": [15, 28], "100": [15, 20, 25, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "843": 15, "cpu_jit": [15, 32, 38, 43, 46, 48, 49, 61, 62], "confus": [15, 32], "move": [15, 32, 46, 48, 49, 61, 62], "map_loc": 15, "resid": 15, "default": [15, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "jit_pretrain": [15, 33, 48, 49, 60], "nn": [15, 40, 46, 48, 49, 60, 61, 62], "56": [15, 20, 25, 28, 29, 43, 51], "00": [15, 25, 28, 38, 40, 41, 43, 47, 51, 52, 54], "603": 15, "121": [15, 20, 47], "nn_model": [15, 38, 43], "129": [15, 20, 41], "640": [15, 25, 30], "134": [15, 20, 38], "641": 15, "138": [15, 20, 38, 40], "148": [15, 20, 35], "642": 15, "154": [15, 20, 41], "727": 15, "190": [15, 47], "192": [15, 30, 35, 43], "export_onnx": 15, "onnxruntim": [15, 31], "888": [15, 38], "83": [15, 20, 43, 47], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 20, 25, 26, 28, 35, 38, 40, 43, 51, 52], "047": [15, 40], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 24, 43, 58, 61, 66], "vocab_s": [15, 26, 28, 29, 30, 40], "049": 15, "140": [15, 20, 25, 41], "int8": [15, 27, 34, 65], "quantiz": [15, 27, 34, 44], "075": 15, "onnx_quant": 15, "538": [15, 43], "tensor": [15, 25, 29, 30, 38, 40, 41, 43, 46, 54, 60, 61], "transpose_1_output_0": 15, "081": 15, "151": [15, 20, 28], "float32": [15, 28, 29, 30], "onnx_pretrain": [15, 31], "260": [15, 30, 43], "166": [15, 20], "171": [15, 25, 41, 43, 51, 52], "173": 15, "267": [15, 29, 40, 51, 52], "270": 15, "180": [15, 29, 38, 43], "279": [15, 43], "196": 15, "318": [15, 28, 29], "232": 15, "234": [15, 43], "deploi": [15, 31, 38, 43], "sherpa": [15, 23, 27, 32, 33, 34, 60, 63], "framework": [15, 23, 46, 61], "_": [15, 20, 44], "ncnn": [15, 24, 34], "forc": [17, 24], "align": [17, 24, 63], "instead": [17, 30, 40, 61], "support": [17, 20, 25, 27, 28, 29, 30, 38, 40, 43, 46, 48, 49, 58, 60, 61, 62, 64, 65], "api": [17, 18, 20], "ctc": [18, 20, 39, 42, 45, 49, 50, 53], "loss": [18, 20, 25, 28, 29, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "log_prob": [18, 46, 61, 62], "token2id": 18, "id2token": 18, "word2id": 18, "id2word": 18, "convert": [18, 25, 28, 29, 30, 43], "graph": [18, 38, 41, 43, 46, 47, 51, 52, 61, 62], "segment": [18, 25], "summari": 18, "todo": 19, "through": [20, 39], "sure": [20, 28, 29, 30], "NOT": [20, 38, 40, 43, 54], "wave": [20, 26, 28, 29, 30, 38, 43], "speech_fil": 20, "download_asset": 20, "asset": 20, "lab41": 20, "sri": 20, "voic": 20, "src": [20, 28, 30], "sp0307": 20, "ch127535": 20, "sg0042": 20, "waveform": 20, "sr": 20, "had": [20, 26, 43, 47], "curios": 20, "besid": 20, "me": 20, "moment": [20, 23], "split": [20, 44], "shape": [20, 25, 30], "assert": [20, 30], "ndim": 20, "16000": [20, 26, 38, 40, 41, 43, 47, 48, 51, 52], "cach": [20, 25, 30], "filenam": [20, 25, 28, 29, 30, 31, 32, 33, 48, 49, 60, 62, 64, 65], "content": [20, 28, 29, 30], "element": [20, 30], "bundl": [20, 25], "pipelin": 20, "mms_fa": 20, "is_avail": 20, "get_model": 20, "with_star": 20, "inference_mod": 20, "emiss": 20, "size": [20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "169": [20, 43, 51, 52], "get_dict": 20, "star": [20, 25, 28, 29, 30], "ep": 20, "dict": [20, 26, 30], "enumer": [20, 25], "prepare_lang": 20, "add_disambig_symbol": 20, "max_disambig_id": 20, "encod": [20, 27, 31, 33, 34, 35, 38, 40, 41, 43, 46, 47, 48, 54, 58, 60, 61, 62], "utf": 20, "join": 20, "k": [20, 30, 46, 51, 52, 60, 61, 62], "rang": 20, "o": [20, 25], "b": [20, 40, 43, 51, 52], "d": [20, 51, 52, 56], "h": [20, 25], "15": [20, 25, 26, 28, 29, 30, 35, 36, 40, 41, 43, 51, 54, 56], "v": [20, 28, 29, 30, 43, 51, 52], "j": [20, 28, 29, 38, 43], "z": [20, 51, 52], "q": 20, "27": [20, 25, 28, 29, 30, 35, 36, 38, 40, 47, 52], "charact": [20, 40], "unit": [20, 40], "prepare_lang_fst": 20, "hl": 20, "rw": [20, 28, 29, 30], "13k": 20, "jun": 20, "7k": 20, "kaldi_decod": 20, "decodablectc": 20, "fasterdecod": 20, "fasterdecoderopt": 20, "kaldifst": 20, "def": 20, "force_align": 20, "stdvectorfst": 20, "contigu": 20, "numpi": [20, 25], "decoder_opt": 20, "max_act": 20, "3000": [20, 26, 28, 29, 30], "reached_fin": 20, "return": 20, "ok": 20, "best_path": 20, "get_best_path": 20, "isymbols_out": 20, "osymbols_out": 20, "total_weight": 20, "get_linear_symbol_sequ": 20, "linear": [20, 28, 29, 40], "sequenc": [20, 61, 62], "increment": [20, 28, 29, 30], "main": [20, 25, 38, 43, 58], "ctc_forced_alignment_api_tutori": 20, "frame": [20, 25, 35, 40, 46, 48, 61, 62], "eas": [20, 28, 29, 30], "31": [20, 28, 29, 30, 35, 43], "33": [20, 25, 28, 29, 38, 39, 40, 43, 51], "37": [20, 29, 38, 40, 43, 51], "40": [20, 25, 28, 29, 30, 41, 43, 47, 51, 52], "41": [20, 25, 28, 30, 38, 40, 51, 54], "46": [20, 25, 29, 38, 43], "49": [20, 25, 28, 29, 43, 52, 54], "51": [20, 25, 28, 38, 43, 54], "54": [20, 25, 29, 30, 43, 47, 51, 52], "55": [20, 25, 28, 41, 43, 51], "60": 20, "64": [20, 25, 26, 28, 35, 40, 61], "66": [20, 25, 29, 36], "68": [20, 25, 43], "69": [20, 30], "70": [20, 25], "71": [20, 25, 43, 47], "72": [20, 40, 43], "75": [20, 25, 28], "78": 20, "79": [20, 25], "80": [20, 26, 28, 29, 30, 38, 40, 43], "81": 20, "82": 20, "84": [20, 29, 38], "85": 20, "87": [20, 25, 28], "88": [20, 28, 40], "89": [20, 25, 38], "90": [20, 25, 28], "91": [20, 25], "92": [20, 25, 43], "94": 20, "97": [20, 25, 28, 38], "98": [20, 38], "101": [20, 29], "102": [20, 30, 38], "105": [20, 43], "107": [20, 29, 47], "108": 20, "110": [20, 43], "113": [20, 40, 43], "114": 20, "116": 20, "117": [20, 43], "118": [20, 25, 43], "119": [20, 54], "120": 20, "122": [20, 43], "123": 20, "124": [20, 25, 38, 43], "125": [20, 43, 54], "126": [20, 43], "128": [20, 35, 43], "130": 20, "131": [20, 38, 43], "133": [20, 30], "135": [20, 43, 54], "137": 20, "139": [20, 54], "141": [20, 28], "143": [20, 54], "145": 20, "146": [20, 25], "147": [20, 29, 30], "149": [20, 25, 28, 43], "150": [20, 38, 43], "152": 20, "153": [20, 43, 54], "155": 20, "157": [20, 25], "158": [20, 29], "159": [20, 29, 43, 54], "161": [20, 41, 43], "162": [20, 43], "163": [20, 40, 43], "164": 20, "165": [20, 38, 43], "167": [20, 25], "merg": 20, "merge_token": 20, "token_span": 20, "span": 20, "end": [20, 40, 46, 48, 49, 54, 60, 61, 62, 64, 65], "unflatten": 20, "list_": 20, "length": [20, 28, 30, 40, 56, 61, 62], "ret": 20, "append": 20, "word_span": 20, "tokenspan": 20, "preview_word": 20, "x0": 20, "int": [20, 38, 43], "x1": 20, "3f": 20, "sec": 20, "ipython": 20, "displai": [20, 38, 40, 41, 43], "along": 20, "stamp": [20, 40], "644": 20, "664": 20, "704": [20, 25, 38, 51], "845": 20, "885": 20, "026": [20, 30], "086": 20, "790": 20, "871": 20, "314": [20, 25], "334": 20, "414": 20, "495": [20, 25], "575": 20, "595": [20, 29], "756": 20, "837": 20, "repost": 20, "whole": [20, 35, 36, 43, 47, 51, 52, 61, 62], "youtub": [21, 24, 43, 44, 46, 47, 48, 49, 60, 61, 62], "video": [21, 24, 43, 44, 46, 47, 48, 49, 60, 61, 62], "upload": [22, 23, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "specif": [22, 31, 40], "aishel": [22, 24, 38, 40, 41, 42, 66], "wenetspeech": [22, 32], "ipad": 23, "phone": 23, "screenshot": [23, 38, 40, 41, 43, 44, 46, 54, 60, 61], "chines": [23, 39, 40], "english": [23, 36, 54, 60], "greedi": 23, "click": [23, 25, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "button": 23, "submit": 23, "wait": 23, "bottom": [23, 46, 48, 49, 60, 61, 62], "subscrib": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "nadira": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "povei": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "www": [23, 25, 39, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62], "uc_vaumpkminz1pnkfxan9mw": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "dummi": [24, 43], "toolkit": 24, "cudnn": 24, "docker": [24, 25], "frequent": 24, "ask": [24, 64], "question": 24, "faq": 24, "oserror": 24, "libtorch_hip": 24, "attributeerror": 24, "distutil": 24, "attribut": [24, 30, 43], "libpython3": 24, "timit": [24, 42, 51, 52, 66], "tt": [24, 64, 65, 66], "vit": [24, 63, 66], "ljspeech": [24, 63, 66], "vctk": [24, 63, 66], "fine": [24, 44, 66], "finetun": [24, 37, 66], "zipform": [24, 27, 31, 34, 37, 42, 45, 56, 57, 59, 66], "adapt": [24, 37, 66], "contribut": 24, "guid": 25, "suggest": [25, 36, 46, 48, 49, 60, 61, 62], "strongli": 25, "point": [25, 26, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "sever": [25, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "just": [25, 28, 29, 30, 56, 58], "kuangfangjun": [25, 28, 29, 30], "cpython3": 25, "final": [25, 26, 28, 29, 43, 47], "9422m": 25, "creator": 25, "cpython3posix": 25, "dest": 25, "fj": [25, 26, 28, 29, 30, 40, 43], "clear": 25, "no_vcs_ignor": 25, "global": 25, "seeder": 25, "fromappdata": 25, "app_data_dir": 25, "ad": [25, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61, 62], "seed": 25, "bashactiv": 25, "cshellactiv": 25, "fishactiv": 25, "nushellactiv": 25, "powershellactiv": 25, "pythonactiv": 25, "determin": 25, "nvidia": [25, 38, 40, 41, 43], "smi": 25, "510": 25, "driver": 25, "greater": 25, "our": [25, 28, 29, 30, 32, 33, 43, 44, 46, 58, 61, 62], "case": [25, 26, 28, 29, 30, 37, 46, 48, 49, 60, 61, 62], "verifi": 25, "nvcc": 25, "copyright": 25, "2005": 25, "2019": 25, "corpor": 25, "wed_oct_23_19": 25, "38_pdt_2019": 25, "v10": 25, "cu116": 25, "compat": 25, "stabl": 25, "matrix": 25, "2bcu116": 25, "cp38": 25, "linux_x86_64": 25, "1983": 25, "mb": [25, 28, 29, 30], "________________________________________": 25, "gb": [25, 40], "764": 25, "kb": [25, 28, 29, 30, 51, 52], "eta": 25, "satisfi": 25, "extens": 25, "__version__": 25, "dev20230725": 25, "pypi": 25, "tuna": 25, "tsinghua": 25, "edu": 25, "resolv": 25, "ubuntu": [25, 28, 29, 30], "2bcuda11": 25, "manylinux_2_17_x86_64": 25, "manylinux2014_x86_64": 25, "graphviz": 25, "de": [25, 26, 28, 29, 30, 40], "5e": 25, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 25, "cento": 25, "2009": 25, "core": 25, "cmake": [25, 28, 29, 38, 43], "gcc": 25, "cmake_cuda_flag": 25, "wno": 25, "deprec": [25, 40], "lineinfo": 25, "expt": 25, "extend": 25, "lambda": 25, "use_fast_math": 25, "xptxa": 25, "gencod": 25, "arch": 25, "compute_35": 25, "sm_35": 25, "compute_50": 25, "sm_50": 25, "compute_60": 25, "sm_60": 25, "compute_61": 25, "sm_61": 25, "compute_70": 25, "sm_70": 25, "compute_75": 25, "sm_75": 25, "compute_80": 25, "sm_80": 25, "compute_86": 25, "sm_86": 25, "donnx_namespac": 25, "onnx_c2": 25, "compute_52": 25, "sm_52": 25, "xcudaf": 25, "diag_suppress": 25, "cc_clobber_ignor": 25, "integer_sign_chang": 25, "useless_using_declar": 25, "set_but_not_us": 25, "field_without_dll_interfac": 25, "base_class_has_different_dll_interfac": 25, "dll_interface_conflict_none_assum": 25, "dll_interface_conflict_dllexport_assum": 25, "implicit_return_from_non_void_funct": 25, "unsigned_compare_with_zero": 25, "declared_but_not_referenc": 25, "bad_friend_decl": 25, "relax": 25, "constexpr": 25, "d_glibcxx_use_cxx11_abi": 25, "option": [25, 27, 31, 34, 40, 44, 47, 51, 52, 54], "wall": 25, "strict": [25, 30, 39], "overflow": 25, "unknown": 25, "pragma": 25, "cmake_cxx_flag": 25, "unus": 25, "nvtx": 25, "disabl": [25, 26, 28, 29], "debug": 25, "sync": 25, "kernel": [25, 28, 30, 35, 40], "memori": [25, 28, 35, 38, 40, 43, 58], "alloc": 25, "214748364800": 25, "byte": [25, 28, 29, 30], "200": [25, 26, 28, 29, 30, 38, 43, 44, 51, 52, 54], "abort": 25, "__file__": 25, "cpython": [25, 28], "gnu": [25, 28], "req": 25, "vq12fd5i": 25, "filter": 25, "quiet": [25, 39], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 25, "metadata": [25, 51, 52], "pyproject": 25, "toml": 25, "cytoolz": 25, "3b": 25, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 25, "pyyaml": 25, "c8": 25, "6b": 25, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 25, "ma": 25, "nylinux_2_17_x86_64": 25, "736": 25, "dataclass": 25, "2f": 25, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 25, "dev0": 25, "7640d66": 25, "a8": 25, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 25, "linux_2_17_x86_64": 25, "tqdm": 25, "e6": 25, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 25, "audioread": 25, "5d": 25, "cb": 25, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 25, "377": 25, "tabul": 25, "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 25, "1a": 25, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 25, "ab": [25, 46, 60, 61, 62], "c3": 25, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 25, "intervaltre": 25, "fb": 25, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 25, "soundfil": 25, "bd": 25, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 25, "py2": 25, "toolz": 25, "7f": 25, "5c": 25, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 25, "sortedcontain": 25, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 25, "cffi": 25, "b7": 25, "8b": 25, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 25, "442": 25, "pycpars": 25, "d5": 25, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 25, "687627": 25, "sha256": 25, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 25, "ephem": 25, "wwtk90_m": 25, "7a": 25, "8e": 25, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 25, "23704": 25, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 25, "9c": 25, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 25, "26098": 25, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 25, "f3": 25, "ed": 25, "2b": 25, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 25, "remot": 25, "12942": 25, "count": 25, "total": [25, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 54, 60, 61], "delta": 25, "reus": 25, "pack": [25, 56, 61, 62], "12875": 25, "receiv": 25, "mib": 25, "8835": 25, "dl_dir": [25, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "___________________________________________________": 25, "70m": 25, "1mb": 25, "718": 25, "compute_fbank_yesno": 25, "_______________________________________________________________________________": 25, "82it": 25, "778": 25, "______________________________________________________________________________": 25, "256": [25, 30, 35, 51, 52], "92it": 25, "project": 25, "kaldilm": 25, "csrc": [25, 43], "arpa_file_pars": 25, "cc": 25, "void": 25, "arpafilepars": 25, "std": 25, "istream": 25, "275": [25, 38], "compile_hlg": 25, "276": 25, "309": 25, "ctc_topo": 25, "max_token_id": 25, "310": 25, "intersect": [25, 46, 61, 62], "323": 25, "lg": [25, 46, 49, 61, 62], "connect": [25, 26, 35, 43, 46, 47, 60, 61, 62], "class": [25, 43], "341": 25, "rag": 25, "raggedtensor": 25, "remov": [25, 38, 40, 41, 43, 47, 51, 52], "disambigu": 25, "354": 25, "remove_epsilon": 25, "445": 25, "arc": 25, "compos": 25, "446": 25, "447": 25, "fault": 25, "dump": 25, "protocol_buffers_python_implement": 25, "674": 25, "interest": [25, 44, 46, 48, 49, 60, 61, 62], "936": 25, "481": 25, "482": 25, "world_siz": [25, 44], "master_port": 25, "12354": 25, "num_epoch": 25, "3fb0a43": 25, "thu": [25, 26, 28, 29, 30, 40, 43, 47], "05": [25, 26, 28, 29, 35, 36, 38, 40, 41, 43, 52, 56, 65], "74279": [25, 26, 28, 29, 30, 40], "1220091118": 25, "57c4d55446": 25, "sph26": 25, "941": 25, "949": 25, "965": [25, 38], "244": 25, "967": 25, "199": [25, 43, 47], "singlecutsampl": 25, "205": [25, 43], "968": 25, "565": [25, 43], "422": 25, "065": 25, "over": [25, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "2436": 25, "tot_loss": 25, "681": [25, 28], "4561": 25, "2828": 25, "7076": 25, "22192": 25, "444": 25, "9002": 25, "18067": 25, "011": 25, "2555": 25, "2695": 25, "484": 25, "34971": 25, "331": [25, 28, 29, 43, 47], "4688": 25, "368": 25, "633": 25, "2532": 25, "242": [25, 38, 43], "1139": 25, "1592": 25, "522": [25, 43], "1627": 25, "209": [25, 47], "07055": 25, "1175": 25, "07091": 25, "847": 25, "07731": 25, "427": [25, 29, 43], "04391": 25, "05341": 25, "884": 25, "04384": 25, "387": [25, 52], "03458": 25, "04616": 25, "707": [25, 38, 43], "03379": 25, "758": [25, 43], "433": [25, 43], "01054": 25, "980": [25, 43], "009014": 25, "009974": 25, "489": [25, 38], "01085": 25, "258": [25, 51, 52], "01172": 25, "01055": 25, "621": [25, 54], "01074": 25, "699": 25, "866": 25, "01044": 25, "844": 25, "008942": 25, "221": [25, 43], "01082": 25, "970": [25, 43], "01169": 25, "247": 25, "01073": 25, "326": [25, 29], "555": 25, "840": 25, "841": 25, "855": 25, "868": 25, "882": 25, "883": 25, "701": 25, "702": [25, 43], "fun": [25, 28, 29], "variou": [25, 31, 34, 66], "period": [26, 28], "disk": 26, "optim": [26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "resum": [26, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "strip": 26, "reduc": [26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "pruned_transducer_stateless3": [26, 32, 58], "almost": [26, 46, 58, 61, 62], "stateless3": [26, 28], "repo": [26, 31], "those": 26, "iter": [26, 28, 29, 30, 33, 46, 48, 49, 60, 61, 62], "1224000": 26, "greedy_search": [26, 35, 36, 40, 46, 48, 60, 61, 62], "test_wav": [26, 28, 29, 30, 31, 38, 40, 41, 43, 47, 51, 52, 54], "1089": [26, 28, 29, 30, 31, 43, 47], "134686": [26, 28, 29, 30, 31, 43, 47], "0001": [26, 28, 29, 30, 31, 43, 47], "1221": [26, 28, 29, 43, 47], "135766": [26, 28, 29, 43, 47], "0002": [26, 28, 29, 43, 47], "multipl": [26, 38, 40, 41, 43, 47, 51, 52, 54], "Its": [26, 28, 29, 30, 43], "233": [26, 28, 29], "265": 26, "subsampling_factor": [26, 29, 30, 38, 40, 43], "encoder_dim": [26, 28, 29, 30], "512": [26, 28, 29, 30, 35, 38, 40, 43], "nhead": [26, 28, 30, 38, 40, 43, 46, 61], "dim_feedforward": [26, 28, 29, 40], "num_encoder_lay": [26, 28, 29, 30, 40], "decoder_dim": [26, 28, 29, 30], "joiner_dim": [26, 28, 29, 30], "model_warm_step": [26, 28, 29], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 26, "oct": [26, 43], "miss": [26, 28, 29, 30, 40, 43], "cu102": [26, 28, 29, 30], "1013": 26, "c39cba5": 26, "dirti": [26, 28, 29, 38, 43], "ceph": [26, 38, 40, 43], "0324160024": 26, "65bfd8b584": 26, "jjlbn": 26, "bpe_model": [26, 28, 29, 30, 43], "max_context": 26, "max_stat": 26, "context_s": [26, 28, 29, 30, 40], "max_sym_per_fram": [26, 40], "simulate_stream": 26, "decode_chunk_s": 26, "left_context": 26, "dynamic_chunk_train": 26, "causal_convolut": 26, "short_chunk_s": [26, 30, 61, 62], "num_left_chunk": [26, 30], "blank_id": [26, 28, 29, 30, 40], "unk_id": 26, "271": [26, 29], "612": 26, "458": 26, "giga": [26, 29, 60], "623": 26, "277": 26, "78648040": 26, "951": [26, 43], "285": [26, 40, 43], "952": 26, "295": [26, 38, 40, 41, 43], "957": 26, "301": [26, 43], "700": 26, "329": [26, 29, 43], "388": 26, "earli": [26, 28, 29, 30, 43, 47], "nightfal": [26, 28, 29, 30, 43, 47], "THE": [26, 28, 29, 30, 43, 47], "yellow": [26, 28, 29, 30, 43, 47], "lamp": [26, 28, 29, 30, 43, 47], "light": [26, 28, 29, 30, 43, 47], "AND": [26, 28, 29, 30, 43, 47], "THERE": [26, 28, 29, 30, 43, 47], "squalid": [26, 28, 29, 30, 43, 47], "quarter": [26, 28, 29, 30, 43, 47], "OF": [26, 28, 29, 30, 43, 47], "brothel": [26, 28, 29, 30, 43, 47], "god": [26, 43, 47], "AS": [26, 43, 47], "direct": [26, 43, 47], "consequ": [26, 43, 47], "sin": [26, 43, 47], "man": [26, 43, 47], "punish": [26, 43, 47], "her": [26, 43, 47], "love": [26, 43, 47], "child": [26, 43, 47], "whose": [26, 40, 43, 47], "ON": [26, 28, 43, 47], "THAT": [26, 43, 47], "dishonor": [26, 43, 47], "bosom": [26, 43, 47], "TO": [26, 43, 47], "parent": [26, 43, 47], "forev": [26, 43, 47], "WITH": [26, 43, 47], "race": [26, 43, 47], "descent": [26, 43, 47], "mortal": [26, 43, 47], "BE": [26, 43, 47], "bless": [26, 43, 47], "soul": [26, 43, 47], "IN": [26, 43, 47], "heaven": [26, 43, 47], "yet": [26, 28, 29, 43, 47], "THESE": [26, 43, 47], "thought": [26, 43, 47], "affect": [26, 43, 47], "hester": [26, 43, 47], "prynn": [26, 43, 47], "hope": [26, 39, 43, 47], "apprehens": [26, 43, 47], "390": 26, "down": [26, 38, 43, 46, 48, 49, 60, 61, 62], "reproduc": [26, 43], "9999": [26, 48, 49, 60], "symlink": 26, "pass": [26, 30, 38, 40, 41, 43, 46, 48, 49, 58, 60, 61, 62], "convemform": [27, 34, 58], "platform": [27, 31], "android": [27, 28, 29, 30, 31, 64], "raspberri": [27, 31], "pi": [27, 31], "\u7231\u82af\u6d3e": 27, "maix": 27, "iii": 27, "axera": 27, "rv1126": 27, "static": 27, "binari": [27, 28, 29, 30, 38, 40, 41, 43, 46, 54, 60, 61, 64], "pnnx": [27, 34], "conv": [28, 29], "emform": [28, 29, 32], "stateless2": [28, 29, 60], "pretrained_model": [28, 29, 30], "online_transduc": 28, "jit_xxx": [28, 29, 30], "anywher": [28, 29], "submodul": 28, "recurs": 28, "init": 28, "dcmake_build_typ": [28, 38, 43], "dncnn_python": 28, "dncnn_build_benchmark": 28, "dncnn_build_exampl": 28, "dncnn_build_tool": 28, "j4": 28, "pwd": 28, "compon": [28, 58], "ncnn2int8": [28, 29], "am": 28, "sai": [28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "later": [28, 29, 30, 38, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62], "termin": 28, "tencent": [28, 29], "modif": [28, 40], "offici": 28, "synchron": 28, "renam": [28, 29, 30], "conv_emformer_transducer_stateless2": [28, 58], "cnn": [28, 30, 35], "context": [28, 35, 40, 46, 58, 60, 61, 62], "configur": [28, 30, 40, 44, 47, 51, 52, 54, 64, 65], "accordingli": [28, 29, 30], "yourself": [28, 29, 30, 44, 61, 62], "220": [28, 40, 41, 43], "229": [28, 38], "best_v": 28, "alid_epoch": 28, "subsampl": [28, 61, 62], "ing_factor": 28, "a34171ed85605b0926eebbd0463d059431f4f74a": 28, "dec": 28, "ver": 28, "ion": 28, "530e8a1": 28, "op": 28, "1220120619": [28, 29, 30], "7695ff496b": [28, 29, 30], "s9n4w": [28, 29, 30], "icefa": 28, "ll": 28, "transdu": 28, "cer": 28, "use_averaged_model": [28, 29, 30], "cnn_module_kernel": [28, 30], "left_context_length": 28, "chunk_length": 28, "right_context_length": 28, "memory_s": 28, "231": [28, 29, 30], "053": 28, "022": 28, "708": [28, 38, 40, 43, 54], "75490012": 28, "320": [28, 40], "682": 28, "lh": [28, 29, 30], "289m": 28, "jan": [28, 29, 30], "289": 28, "roughli": [28, 29, 30], "equal": [28, 29, 30, 61, 62], "1024": [28, 29, 30, 35, 60], "287": [28, 54], "1010k": [28, 29], "decoder_jit_trac": [28, 29, 30, 33, 60, 62], "283m": 28, "encoder_jit_trac": [28, 29, 30, 33, 60, 62], "0m": [28, 29], "joiner_jit_trac": [28, 29, 30, 33, 60, 62], "found": [28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "param": [28, 29, 30], "503k": [28, 29], "437": [28, 29, 30], "142m": 28, "79k": 28, "5m": [28, 29], "architectur": [28, 29, 30, 60], "editor": [28, 29, 30], "283": [28, 30], "1010": [28, 29], "503": [28, 29], "convers": [28, 29, 30], "half": [28, 29, 30, 46, 61, 62], "float16": [28, 29, 30], "occupi": [28, 29, 30], "twice": [28, 29, 30], "smaller": [28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "fp16": [28, 29, 30, 35, 36, 46, 48, 49, 56, 60, 61, 62, 64, 65], "won": [28, 29, 30, 31, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "accept": [28, 29, 30], "216": [28, 38, 43, 51, 52], "encoder_param_filenam": [28, 29, 30], "encoder_bin_filenam": [28, 29, 30], "decoder_param_filenam": [28, 29, 30], "decoder_bin_filenam": [28, 29, 30], "joiner_param_filenam": [28, 29, 30], "joiner_bin_filenam": [28, 29, 30], "sound_filenam": [28, 29, 30], "328": 28, "336": 28, "106000": [28, 29, 30, 43, 47], "581": [28, 47], "381": 28, "7767517": [28, 29, 30], "1060": 28, "1342": 28, "in0": [28, 29, 30], "explan": [28, 29, 30], "magic": [28, 29, 30], "intermedi": [28, 29, 30], "1061": 28, "sherpametadata": [28, 29, 30], "sherpa_meta_data1": [28, 29, 30], "newli": [28, 29, 30], "must": [28, 29, 30, 61], "pair": [28, 29, 30], "sad": [28, 29, 30], "rememb": [28, 29, 30], "anymor": [28, 29, 30], "flexibl": [28, 29, 30, 35], "edit": [28, 29, 30], "arm": [28, 29, 30], "aarch64": [28, 29, 30], "onc": [28, 29], "mayb": [28, 29], "year": [28, 29], "_jit_trac": [28, 29], "fp32": [28, 29], "doubl": [28, 29], "py38": [28, 29, 30], "arg": [28, 29], "wave_filenam": [28, 29], "16k": [28, 29], "hz": [28, 29, 51, 52], "mono": [28, 29], "calibr": [28, 29], "cat": [28, 29], "eof": [28, 29], "calcul": [28, 29, 48, 61, 62], "has_gpu": [28, 29], "config": [28, 29], "use_vulkan_comput": [28, 29], "conv_87": 28, "942385": [28, 29], "threshold": [28, 29, 48], "938493": 28, "968131": 28, "conv_88": 28, "442448": 28, "549335": 28, "167552": 28, "conv_89": 28, "228289": 28, "001738": 28, "871552": 28, "linear_90": 28, "976146": 28, "101789": 28, "267128": 28, "linear_91": 28, "962030": 28, "162033": 28, "602713": 28, "linear_92": 28, "323041": 28, "853959": 28, "953129": 28, "linear_94": 28, "905416": 28, "648006": 28, "323545": 28, "linear_93": 28, "474093": 28, "200188": 28, "linear_95": 28, "888012": 28, "403563": 28, "483986": 28, "linear_96": 28, "856741": 28, "398679": 28, "524273": 28, "linear_97": 28, "635942": 28, "613655": 28, "590950": 28, "linear_98": 28, "460340": 28, "670146": 28, "398010": 28, "linear_99": 28, "532276": 28, "585537": 28, "119396": 28, "linear_101": 28, "585871": 28, "719224": 28, "205809": 28, "linear_100": 28, "751382": 28, "081648": 28, "linear_102": 28, "593344": 28, "450581": 28, "551147": 28, "linear_103": 28, "592681": 28, "705824": 28, "257959": 28, "linear_104": 28, "752957": 28, "980955": 28, "110489": 28, "linear_105": 28, "696240": 28, "877193": 28, "608953": 28, "linear_106": 28, "059659": 28, "643138": 28, "048950": 28, "linear_108": 28, "975461": 28, "589567": 28, "671457": 28, "linear_107": 28, "190381": 28, "515701": 28, "linear_109": 28, "710759": 28, "305635": 28, "082436": 28, "linear_110": 28, "531228": 28, "731162": 28, "159557": 28, "linear_111": 28, "528083": 28, "259322": 28, "211544": 28, "linear_112": 28, "148807": 28, "500842": 28, "087374": 28, "linear_113": 28, "592566": 28, "948851": 28, "166611": 28, "linear_115": 28, "437109": 28, "608947": 28, "642395": 28, "linear_114": 28, "193942": 28, "503904": 28, "linear_116": 28, "966980": 28, "200896": 28, "676392": 28, "linear_117": 28, "451303": 28, "061664": 28, "951344": 28, "linear_118": 28, "077262": 28, "965800": 28, "023804": 28, "linear_119": 28, "671615": 28, "847613": 28, "198460": 28, "linear_120": 28, "625638": 28, "131427": 28, "556595": 28, "linear_122": 28, "274080": 28, "888716": 28, "978189": 28, "linear_121": 28, "420480": 28, "429659": 28, "linear_123": 28, "826197": 28, "599617": 28, "281532": 28, "linear_124": 28, "396383": 28, "325849": 28, "335875": 28, "linear_125": 28, "337198": 28, "941410": 28, "221970": 28, "linear_126": 28, "699965": 28, "842878": 28, "224073": 28, "linear_127": 28, "775370": 28, "884215": 28, "696438": 28, "linear_129": 28, "872276": 28, "837319": 28, "254213": 28, "linear_128": 28, "180057": 28, "687883": 28, "linear_130": 28, "150427": 28, "454298": 28, "765789": 28, "linear_131": 28, "112692": 28, "924847": 28, "025545": 28, "linear_132": 28, "852893": 28, "116593": 28, "749626": 28, "linear_133": 28, "517084": 28, "024665": 28, "275314": 28, "linear_134": 28, "683807": 28, "878618": 28, "743618": 28, "linear_136": 28, "421055": 28, "322729": 28, "086264": 28, "linear_135": 28, "309880": 28, "917679": 28, "linear_137": 28, "827781": 28, "744595": 28, "915554": 28, "linear_138": 28, "422395": 28, "742882": 28, "402161": 28, "linear_139": 28, "527538": 28, "866123": 28, "849449": 28, "linear_140": 28, "128619": 28, "657793": 28, "266134": 28, "linear_141": 28, "839593": 28, "845993": 28, "021378": 28, "linear_143": 28, "442304": 28, "099039": 28, "889746": 28, "linear_142": 28, "325038": 28, "849592": 28, "linear_144": 28, "929444": 28, "618206": 28, "605080": 28, "linear_145": 28, "382126": 28, "321095": 28, "625010": 28, "linear_146": 28, "894987": 28, "867645": 28, "836517": 28, "linear_147": 28, "915313": 28, "906028": 28, "886522": 28, "linear_148": 28, "614287": 28, "908151": 28, "496181": 28, "linear_150": 28, "724932": 28, "485588": 28, "312899": 28, "linear_149": 28, "161146": 28, "606939": 28, "linear_151": 28, "164453": 28, "847355": 28, "719223": 28, "linear_152": 28, "086471": 28, "984121": 28, "222834": 28, "linear_153": 28, "099524": 28, "991601": 28, "816805": 28, "linear_154": 28, "054585": 28, "489706": 28, "286930": 28, "linear_155": 28, "389185": 28, "100321": 28, "963501": 28, "linear_157": 28, "982999": 28, "154796": 28, "637253": 28, "linear_156": 28, "537706": 28, "875190": 28, "linear_158": 28, "420287": 28, "502287": 28, "531588": 28, "linear_159": 28, "014746": 28, "423280": 28, "477261": 28, "linear_160": 28, "633553": 28, "715335": 28, "220921": 28, "linear_161": 28, "371849": 28, "117830": 28, "815203": 28, "linear_162": 28, "492933": 28, "126283": 28, "623318": 28, "linear_164": 28, "697504": 28, "825712": 28, "317358": 28, "linear_163": 28, "078367": 28, "008038": 28, "linear_165": 28, "023975": 28, "836278": 28, "577358": 28, "linear_166": 28, "860619": 28, "259792": 28, "493614": 28, "linear_167": 28, "380934": 28, "496160": 28, "107042": 28, "linear_168": 28, "691216": 28, "733317": 28, "831076": 28, "linear_169": 28, "723948": 28, "952728": 28, "129707": 28, "linear_171": 28, "034811": 28, "366547": 28, "665123": 28, "linear_170": 28, "356277": 28, "710501": 28, "linear_172": 28, "556884": 28, "729481": 28, "166058": 28, "linear_173": 28, "033039": 28, "207264": 28, "442120": 28, "linear_174": 28, "597379": 28, "658676": 28, "768131": 28, "linear_2": [28, 29], "293503": 28, "305265": 28, "877850": 28, "linear_1": [28, 29], "812222": 28, "766452": 28, "487047": 28, "linear_3": [28, 29], "999999": 28, "999755": 28, "031174": 28, "wish": [28, 29], "955k": 28, "18k": 28, "inparam": [28, 29], "inbin": [28, 29], "outparam": [28, 29], "outbin": [28, 29], "99m": 28, "78k": 28, "774k": [28, 29], "496": [28, 29, 43, 47], "replac": [28, 29], "774": [28, 29], "convolut": [28, 29, 48, 58, 61], "exact": [28, 29], "4x": [28, 29], "comparison": 28, "468000": [29, 33, 60], "lstm_transducer_stateless2": [29, 33, 60], "862": 29, "222": [29, 41, 43], "865": 29, "is_pnnx": 29, "62e404dd3f3a811d73e424199b3408e309c06e1a": [29, 30], "6d7a559": [29, 30], "feb": [29, 30, 40], "rnn_hidden_s": 29, "aux_layer_period": 29, "235": 29, "239": [29, 40], "472": 29, "324": 29, "83137520": 29, "596": 29, "325": 29, "257024": 29, "781812": 29, "327": 29, "84176356": 29, "182": [29, 30, 38, 47], "183": [29, 51, 52], "335": 29, "tracerwarn": [29, 30], "boolean": [29, 30], "caus": [29, 30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "incorrect": [29, 30, 40], "flow": [29, 30], "constant": [29, 30], "futur": [29, 30, 40, 66], "need_pad": 29, "bool": 29, "259": [29, 38], "339": 29, "207": [29, 41, 43], "324m": 29, "321": [29, 38], "318m": 29, "159m": 29, "21k": 29, "861": 29, "266": [29, 30, 43, 47], "431": 29, "342": 29, "343": 29, "379": 29, "268": [29, 43, 47], "317m": 29, "317": 29, "conv_15": 29, "930708": 29, "972025": 29, "conv_16": 29, "978855": 29, "031788": 29, "456645": 29, "conv_17": 29, "868437": 29, "830528": 29, "218575": 29, "linear_18": 29, "107259": 29, "194808": 29, "293236": 29, "linear_19": 29, "193777": 29, "634748": 29, "401705": 29, "linear_20": 29, "259933": 29, "606617": 29, "722160": 29, "linear_21": 29, "186600": 29, "790260": 29, "512129": 29, "linear_22": 29, "759041": 29, "265832": 29, "050053": 29, "linear_23": 29, "931209": 29, "099090": 29, "979767": 29, "linear_24": 29, "324160": 29, "215561": 29, "321835": 29, "linear_25": 29, "800708": 29, "599352": 29, "284134": 29, "linear_26": 29, "492444": 29, "153369": 29, "274391": 29, "linear_27": 29, "660161": 29, "720994": 29, "674126": 29, "linear_28": 29, "415265": 29, "174434": 29, "007133": 29, "linear_29": 29, "038418": 29, "118534": 29, "724262": 29, "linear_30": 29, "072084": 29, "936867": 29, "259155": 29, "linear_31": 29, "342712": 29, "599489": 29, "282787": 29, "linear_32": 29, "340535": 29, "120308": 29, "701103": 29, "linear_33": 29, "846987": 29, "630030": 29, "985939": 29, "linear_34": 29, "686298": 29, "204571": 29, "607586": 29, "linear_35": 29, "904821": 29, "575518": 29, "756420": 29, "linear_36": 29, "806659": 29, "585589": 29, "118401": 29, "linear_37": 29, "402340": 29, "047157": 29, "162680": 29, "linear_38": 29, "174589": 29, "923361": 29, "030258": 29, "linear_39": 29, "178576": 29, "556058": 29, "807705": 29, "linear_40": 29, "901954": 29, "301267": 29, "956539": 29, "linear_41": 29, "839805": 29, "597429": 29, "716181": 29, "linear_42": 29, "178945": 29, "651595": 29, "895699": 29, "829245": 29, "627592": 29, "637907": 29, "746186": 29, "255032": 29, "167313": 29, "000000": 29, "999756": 29, "031013": 29, "345k": 29, "17k": 29, "218m": 29, "counterpart": 29, "bit": [29, 38, 40, 41, 43, 47, 54], "4532": 29, "feedforward": [30, 35, 40, 46, 61], "384": [30, 35, 43], "unmask": [30, 35], "downsampl": [30, 35, 39], "factor": [30, 35, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "473": [30, 43], "246": [30, 40, 43, 51, 52], "477": 30, "warm_step": 30, "2000": [30, 41], "feedforward_dim": 30, "attention_dim": [30, 38, 40, 43], "encoder_unmasked_dim": 30, "zipformer_downsampling_factor": 30, "decode_chunk_len": 30, "257": [30, 40, 51, 52], "023": 30, "zipformer2": 30, "419": 30, "At": [30, 38, 43], "stack": 30, "downsampling_factor": 30, "037": 30, "655": 30, "346": 30, "68944004": 30, "347": 30, "260096": 30, "348": [30, 51], "716276": 30, "656": [30, 43], "349": 30, "69920376": 30, "351": 30, "353": 30, "174": [30, 43], "175": 30, "1344": 30, "cached_len": 30, "num_lay": 30, "1348": 30, "cached_avg": 30, "1352": 30, "cached_kei": 30, "1356": 30, "cached_v": 30, "1360": 30, "cached_val2": 30, "1364": 30, "cached_conv1": 30, "1368": 30, "cached_conv2": 30, "1373": 30, "left_context_len": 30, "1884": 30, "x_size": 30, "2442": 30, "2449": 30, "2469": 30, "2473": 30, "2483": 30, "kv_len": 30, "2570": 30, "attn_output": 30, "bsz": 30, "num_head": 30, "seq_len": 30, "head_dim": 30, "2926": 30, "lorder": 30, "2652": 30, "2653": 30, "embed_dim": 30, "2666": 30, "1543": 30, "in_x_siz": 30, "1637": 30, "1643": 30, "in_channel": 30, "1571": 30, "1763": 30, "src1": 30, "src2": 30, "1779": 30, "dim1": 30, "1780": 30, "dim2": 30, "_trace": 30, "958": 30, "tracer": 30, "tupl": 30, "namedtupl": 30, "absolut": 30, "know": [30, 44], "side": 30, "allow": [30, 46, 61], "behavior": [30, 40], "_c": 30, "_create_method_from_trac": 30, "646": 30, "357": 30, "embedding_out": 30, "686": 30, "361": [30, 43, 47], "735": 30, "269m": 30, "269": [30, 38, 51, 52], "725": [30, 47], "1022k": 30, "266m": 30, "8m": 30, "509k": 30, "133m": 30, "152k": 30, "4m": 30, "1022": 30, "509": 30, "360": 30, "365": 30, "280": [30, 43], "372": [30, 38], "state": [30, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "410": 30, "411": [30, 43], "2028": 30, "2547": 30, "2029": 30, "23316": 30, "23317": 30, "23318": 30, "23319": 30, "23320": 30, "amount": [30, 37, 39], "pad": [30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "conv2dsubsampl": 30, "arrai": 30, "23300": 30, "repo_url": 31, "basenam": 31, "why": 32, "streaming_asr": [32, 33, 60, 61, 62], "conv_emform": 32, "offline_asr": [32, 46], "baz": 33, "compact": 35, "inject": 35, "competit": 35, "full": [35, 36, 43, 44, 46, 48, 49, 60, 61, 62], "subset": [35, 36, 43, 46, 48, 49, 60, 61, 62], "instruct": [35, 36], "intial": [35, 36], "decode_gigaspeech": [35, 36], "1000": [35, 36, 43, 64, 65], "insert": 35, "residu": 35, "zipformer2encoderlay": 35, "remain": 35, "untouch": 35, "experi": [35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "do_finetun": [35, 36], "use_adapt": 35, "adapter_dim": 35, "zipformer_adapt": 35, "world": [35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62, 64, 65], "exp_giga_finetune_adapt": 35, "_adapter_dim": 35, "045": 35, "13022": 35, "ckpt": [35, 36], "certain": [35, 36, 37], "bottleneck": 35, "notic": 35, "trainal": 35, "2024": [35, 64], "808": [35, 43, 51], "1277": 35, "761344": 35, "trainabl": 35, "entir": 35, "deactiv": 35, "keep": [35, 40, 46, 61, 62], "768": 35, "1536": 35, "queri": 35, "po": 35, "causal": [35, 61], "previou": [36, 56], "stateless": [36, 39, 42, 46, 60, 61, 62], "due": [36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "vocabulari": [36, 40], "use_mux": 36, "exp_giga_finetun": 36, "_mux": 36, "0045": 36, "mux": 36, "13024": 36, "forget": 36, "quickli": 36, "mix": 36, "maintain": 36, "ones": 36, "lower": [36, 60], "public": 37, "capabl": 37, "high": [37, 39, 64], "label": 37, "1best": [38, 41, 43, 47, 48, 49, 51, 52], "automag": [38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "stop": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "By": [38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "musan": [38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "apt": [38, 41], "permiss": [38, 41], "commandlin": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "multi": [38, 40, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "machin": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "ddp": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "implement": [38, 40, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "utter": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "oom": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "decai": [38, 41, 43, 48, 49, 60], "warmup": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "function": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "get_param": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "directli": [38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "perturb": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "3x150": [38, 40, 41], "450": [38, 40, 41], "visual": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "logdir": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "labelsmooth": 38, "tensorflow": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "press": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "ctrl": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "engw8ksktzqs24zbv5dgcg": 38, "2021": [38, 41, 43, 47, 51, 52, 54], "22t11": 38, "scan": [38, 40, 41, 43, 46, 54, 60, 61], "116068": 38, "scalar": [38, 40, 41, 43, 46, 54, 60, 61], "listen": [38, 40, 41, 46, 54, 60, 61], "xxxx": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "saw": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "consol": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "avoid": [38, 40, 43], "nbest": [38, 43, 49], "lattic": [38, 41, 43, 46, 47, 51, 52, 61, 62], "uniqu": [38, 43, 46, 61, 62], "pkufool": [38, 41, 47], "icefall_asr_aishell_conformer_ctc": 38, "transcrib": [38, 40, 41, 43], "lang_char": [38, 40], "bac009s0764w0121": [38, 40, 41], "bac009s0764w0122": [38, 40, 41], "bac009s0764w0123": [38, 40, 41], "tran": [38, 41, 43, 47, 51, 52], "conveni": [38, 41, 43, 44], "eo": [38, 41, 43], "soxi": [38, 40, 41, 43, 47, 54], "sampl": [38, 40, 41, 43, 47, 48, 54, 61, 62], "precis": [38, 40, 41, 43, 46, 47, 54, 61, 62], "67263": [38, 40, 41], "cdda": [38, 40, 41, 43, 47, 54], "sector": [38, 40, 41, 43, 47, 54], "135k": [38, 40, 41], "256k": [38, 40, 41, 43], "sign": [38, 40, 41, 43, 54], "integ": [38, 40, 41, 43, 54], "pcm": [38, 40, 41, 43, 54], "65840": [38, 40, 41], "308": [38, 40, 41], "625": [38, 40, 41], "132k": [38, 40, 41], "64000": [38, 40, 41], "300": [38, 40, 41, 43, 44, 46, 56, 61], "128k": [38, 40, 41, 54], "topologi": [38, 43], "num_decoder_lay": [38, 43], "vgg_frontend": [38, 40, 43], "use_feat_batchnorm": [38, 43], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 38, "sun": 38, "sep": 38, "33cfe45": 38, "d57a873": 38, "nov": [38, 43], "hw": 38, "kangwei": 38, "icefall_aishell3": 38, "k2_releas": 38, "tokens_fil": 38, "num_path": [38, 43, 46, 61, 62], "ngram_lm_scal": [38, 43], "attention_decoder_scal": [38, 43], "nbest_scal": [38, 43], "sos_id": [38, 43], "eos_id": [38, 43], "4336": [38, 40], "293": [38, 43], "369": [38, 43], "\u751a": [38, 40], "\u81f3": [38, 40], "\u51fa": [38, 40], "\u73b0": [38, 40], "\u4ea4": [38, 40], "\u6613": [38, 40], "\u51e0": [38, 40], "\u4e4e": [38, 40], "\u505c": [38, 40], "\u6b62": 38, "\u7684": [38, 40, 41], "\u60c5": [38, 40], "\u51b5": [38, 40], "\u4e00": [38, 40], "\u4e8c": [38, 40], "\u7ebf": [38, 40, 41], "\u57ce": [38, 40], "\u5e02": [38, 40], "\u867d": [38, 40], "\u7136": [38, 40], "\u4e5f": [38, 40, 41], "\u5904": [38, 40], "\u4e8e": [38, 40], "\u8c03": [38, 40], "\u6574": [38, 40], "\u4e2d": [38, 40, 41], "\u4f46": [38, 40, 41], "\u56e0": [38, 40], "\u4e3a": [38, 40], "\u805a": [38, 40], "\u96c6": [38, 40], "\u4e86": [38, 40, 41], "\u8fc7": [38, 40], "\u591a": [38, 40], "\u516c": [38, 40], "\u5171": [38, 40], "\u8d44": [38, 40], "\u6e90": [38, 40], "371": 38, "683": 38, "684": [38, 54], "651": [38, 54], "654": 38, "659": 38, "752": 38, "887": 38, "340": 38, "370": 38, "\u751a\u81f3": [38, 41], "\u51fa\u73b0": [38, 41], "\u4ea4\u6613": [38, 41], "\u51e0\u4e4e": [38, 41], "\u505c\u6b62": 38, "\u60c5\u51b5": [38, 41], "\u4e00\u4e8c": [38, 41], "\u57ce\u5e02": [38, 41], "\u867d\u7136": [38, 41], "\u5904\u4e8e": [38, 41], "\u8c03\u6574": [38, 41], "\u56e0\u4e3a": [38, 41], "\u805a\u96c6": [38, 41], "\u8fc7\u591a": [38, 41], "\u516c\u5171": [38, 41], "\u8d44\u6e90": [38, 41], "recor": [38, 43], "highest": [38, 43], "966": 38, "821": 38, "822": 38, "826": 38, "916": 38, "345": 38, "889": 38, "limit": [38, 40, 43, 58, 61], "upgrad": [38, 43], "checkout": [38, 43], "hlg_decod": [38, 43], "four": [38, 43], "messag": [38, 43, 46, 48, 49, 60, 61, 62], "use_gpu": [38, 43], "word_tabl": [38, 43], "forward": [38, 43, 48], "cu": [38, 43], "char": [38, 43], "693": [38, 51], "nnet_output": [38, 43], "185": [38, 43, 54], "217": [38, 43], "mandarin": 39, "beij": 39, "shell": 39, "technologi": 39, "ltd": 39, "peopl": 39, "accent": 39, "area": 39, "invit": 39, "particip": 39, "conduct": 39, "indoor": 39, "fidel": 39, "microphon": 39, "16khz": 39, "manual": 39, "profession": 39, "annot": 39, "inspect": 39, "free": [39, 44, 56, 60], "academ": 39, "moder": 39, "research": 39, "openslr": [39, 56], "conv1d": [40, 46, 60, 61, 62], "tanh": 40, "borrow": 40, "ieeexplor": 40, "ieee": 40, "jsp": 40, "arnumb": 40, "9054419": 40, "predict": [40, 44, 46, 60, 61, 62], "87939824": 40, "optimized_transduc": 40, "technqiu": 40, "maximum": 40, "emit": 40, "simplifi": [40, 58], "significantli": 40, "degrad": 40, "exactli": 40, "unprun": 40, "advantag": 40, "minim": 40, "pruned_transducer_stateless": [40, 46, 58, 61], "altern": 40, "though": 40, "transducer_stateless_modifi": 40, "pr": 40, "ram": 40, "tri": 40, "prob": [40, 60], "219": [40, 43], "lagz6hrcqxoigbfd5e0y3q": 40, "03t14": 40, "8477": 40, "250": [40, 47], "sym": [40, 46, 61, 62], "beam_search": [40, 46, 61, 62], "decoding_method": 40, "beam_4": 40, "ensur": 40, "poor": 40, "531": [40, 41], "994": [40, 43], "027": 40, "encoder_out_dim": 40, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 40, "50d2281": 40, "mar": 40, "0815224919": 40, "75d558775b": 40, "mmnv8": 40, "878": [40, 52], "880": 40, "891": 40, "userwarn": 40, "__floordiv__": 40, "round": 40, "toward": 40, "trunc": 40, "floor": 40, "div": 40, "rounding_mod": 40, "divis": 40, "x_len": 40, "\u6ede": 40, "322": 40, "759": 40, "760": 40, "919": 40, "922": 40, "929": 40, "046": 40, "319": [40, 43], "798": 40, "831": [40, 52], "215": [40, 43, 47], "402": 40, "topk_hyp_index": 40, "topk_index": 40, "logit": 40, "583": [40, 52], "lji9mwuorlow3jkdhxwk8a": 41, "13t11": 41, "4454": 41, "icefall_asr_aishell_tdnn_lstm_ctc": 41, "858": [41, 43], "389": [41, 43], "536": 41, "539": 41, "917": 41, "\u505c\u6ede": 41, "mmi": [42, 45], "blank": [42, 45], "skip": [42, 44, 45, 46, 60, 61, 62], "distil": [42, 45], "hubert": [42, 45], "ligru": [42, 50], "libri": [43, 44, 46, 48, 49, 60, 61, 62], "3x960": [43, 46, 48, 49, 60, 61, 62], "2880": [43, 46, 48, 49, 60, 61, 62], "lzgnetjwrxc3yghnmd4kpw": 43, "24t16": 43, "4540": 43, "sentenc": [43, 56], "piec": 43, "And": [43, 46, 48, 49, 60, 61, 62], "neither": 43, "nor": 43, "5000": 43, "033": 43, "537": 43, "full_libri": [43, 44], "464": 43, "548": 43, "776": 43, "652": [43, 54], "109226120": 43, "714": [43, 51], "206": 43, "944": 43, "1328": 43, "443": [43, 47], "2563": 43, "494": 43, "592": 43, "1715": 43, "52576": 43, "1424": 43, "807": 43, "506": 43, "362": 43, "1477": 43, "2922": 43, "4295": 43, "52343": 43, "396": 43, "3584": 43, "432": 43, "680": [43, 51], "_pickl": 43, "unpicklingerror": 43, "invalid": 43, "hlg_modifi": 43, "g_4_gram": [43, 47, 51, 52], "sentencepiec": 43, "875": [43, 47], "212k": 43, "267440": [43, 47], "1253": [43, 47], "535k": 43, "77200": [43, 47], "154k": 43, "554": 43, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 43, "8d93169": 43, "601": 43, "025": 43, "broffel": 43, "osom": 43, "723": 43, "775": 43, "881": 43, "571": 43, "857": 43, "979": 43, "055": 43, "051": 43, "363": 43, "959": [43, 52], "546": 43, "598": 43, "599": [43, 47], "833": 43, "834": 43, "915": 43, "076": 43, "397": 43, "999": [43, 46, 61, 62], "concaten": 43, "bucket": 43, "sampler": 43, "ctc_decod": 43, "ngram_lm_rescor": 43, "attention_rescor": 43, "228": 43, "543": 43, "topo": 43, "547": 43, "729": 43, "703": 43, "545": 43, "945": 43, "475": 43, "191": [43, 51, 52], "398": 43, "515": 43, "deseri": 43, "441": 43, "fsaclass": 43, "loadfsa": 43, "const": 43, "string": 43, "c10": 43, "ignor": 43, "589": 43, "attention_scal": 43, "188": 43, "984": 43, "624": 43, "519": [43, 52], "632": 43, "645": [43, 54], "243": 43, "303": 43, "179": 43, "knowledg": 44, "vector": 44, "mvq": 44, "kd": 44, "pruned_transducer_stateless4": [44, 46, 58, 61], "theoret": 44, "applic": 44, "minor": 44, "stop_stag": [44, 64, 65], "thing": 44, "distillation_with_hubert": 44, "Of": 44, "cours": 44, "xl": 44, "proce": 44, "960h": [44, 48], "use_extracted_codebook": 44, "augment": 44, "th": [44, 51, 52], "embedding_lay": 44, "num_codebook": 44, "under": [44, 56], "vq_fbank_layer36_cb8": 44, "whola": 44, "snippet": 44, "echo": 44, "awk": 44, "pruned_transducer_stateless6": 44, "12359": 44, "spec": 44, "warp": 44, "paid": 44, "suitabl": [46, 60, 61, 62], "pruned_transducer_stateless2": [46, 58, 61], "pruned_transducer_stateless5": [46, 58, 61], "scroll": [46, 48, 49, 60, 61, 62], "arxiv": [46, 60, 61, 62], "2206": [46, 60, 61, 62], "13236": [46, 60, 61, 62], "rework": [46, 58, 61], "daniel": [46, 61, 62], "joint": [46, 60, 61, 62], "contrari": [46, 60, 61, 62], "convent": [46, 60, 61, 62], "recurr": [46, 60, 61, 62], "2x": [46, 61, 62], "littl": [46, 61], "436000": [46, 48, 49, 60, 61, 62], "438000": [46, 48, 49, 60, 61, 62], "qogspbgsr8kzcrmmie9jgw": 46, "20t15": [46, 60, 61], "4468": [46, 60, 61], "210171": [46, 60, 61], "access": [46, 48, 49, 60, 61, 62], "googl": [46, 48, 49, 60, 61, 62], "6008": [46, 48, 49, 60, 61, 62], "localhost": [46, 48, 49, 60, 61, 62], "expos": [46, 48, 49, 60, 61, 62], "proxi": [46, 48, 49, 60, 61, 62], "bind_al": [46, 48, 49, 60, 61, 62], "fast_beam_search": [46, 48, 60, 61, 62], "474000": [46, 60, 61, 62], "largest": [46, 61, 62], "posterior": [46, 48, 61, 62], "algorithm": [46, 61, 62], "pdf": [46, 49, 61, 62], "1211": [46, 61, 62], "3711": [46, 61, 62], "espnet": [46, 61, 62], "net": [46, 61, 62], "beam_search_transduc": [46, 61, 62], "basic": [46, 61], "topk": [46, 61, 62], "expand": [46, 61, 62], "mode": [46, 61, 62], "being": [46, 61, 62], "hardcod": [46, 61, 62], "composit": [46, 61, 62], "hard": [46, 58, 61, 62], "2211": [46, 61, 62], "00484": [46, 61, 62], "fast_beam_search_lg": [46, 61, 62], "trivial": [46, 61, 62], "fast_beam_search_nbest": [46, 61, 62], "random_path": [46, 61, 62], "shortest": [46, 61, 62], "fast_beam_search_nbest_lg": [46, 61, 62], "logic": [46, 61, 62], "smallest": [46, 60, 61, 62], "normal": [47, 51, 52, 54, 61], "icefall_asr_librispeech_tdnn": 47, "lstm_ctc": 47, "flac": 47, "116k": 47, "140k": 47, "343k": 47, "164k": 47, "105k": 47, "174k": 47, "pretraind": 47, "584": [47, 52], "791": 47, "245": 47, "098": 47, "099": 47, "methond": [47, 51, 52], "631": 47, "010": 47, "guidanc": 48, "bigger": 48, "simpli": 48, "discard": 48, "prevent": 48, "lconv": 48, "encourag": [48, 49, 60], "stabil": [48, 49], "doesn": 48, "warm": [48, 49], "xyozukpeqm62hbilud4upa": [48, 49], "ctc_guide_decode_b": 48, "pretrained_ctc": 48, "jit_pretrained_ctc": 48, "100h": 48, "yfyeung": 48, "wechat": 49, "zipformer_mmi": 49, "worker": [49, 60], "hp": 49, "tdnn_ligru_ctc": 51, "enough": [51, 52, 54, 56], "luomingshuang": [51, 52], "icefall_asr_timit_tdnn_ligru_ctc": 51, "pretrained_average_9_25": 51, "fdhc0_si1559": [51, 52], "felc0_si756": [51, 52], "fmgd0_si1564": [51, 52], "ffprobe": [51, 52], "show_format": [51, 52], "nistspher": [51, 52], "database_id": [51, 52], "database_vers": [51, 52], "utterance_id": [51, 52], "dhc0_si1559": [51, 52], "sample_min": [51, 52], "4176": [51, 52], "sample_max": [51, 52], "5984": [51, 52], "bitrat": [51, 52], "pcm_s16le": [51, 52], "s16": [51, 52], "elc0_si756": [51, 52], "1546": [51, 52], "1989": [51, 52], "mgd0_si1564": [51, 52], "7626": [51, 52], "10573": [51, 52], "660": 51, "695": 51, "697": 51, "819": 51, "829": 51, "sil": [51, 52], "dh": [51, 52], "ih": [51, 52], "uw": [51, 52], "ah": [51, 52], "ii": [51, 52], "aa": [51, 52], "ei": [51, 52], "dx": [51, 52], "uh": [51, 52], "ng": [51, 52, 64], "eh": [51, 52], "jh": [51, 52], "er": [51, 52], "ai": [51, 52], "hh": [51, 52], "aw": 51, "ae": [51, 52], "705": 51, "715": 51, "720": 51, "251": [51, 52], "ch": 51, "icefall_asr_timit_tdnn_lstm_ctc": 52, "pretrained_average_16_25": 52, "816": 52, "827": 52, "unk": 52, "739": 52, "977": 52, "978": 52, "981": 52, "ow": 52, "ykubhb5wrmosxykid1z9eg": 54, "23t23": 54, "icefall_asr_yesno_tdnn": 54, "0_0_1_0_0_1_1_1": 54, "0_0_1_0_1_0_0_1": 54, "0_0_1_1_0_0_0_1": 54, "0_0_1_1_0_1_1_0": 54, "0_0_1_1_1_0_0_0": 54, "0_0_1_1_1_1_0_0": 54, "0_1_0_0_0_1_0_0": 54, "0_1_0_0_1_0_1_0": 54, "0_1_0_1_0_0_0_0": 54, "0_1_0_1_1_1_0_0": 54, "0_1_1_0_0_1_1_1": 54, "0_1_1_1_0_0_1_0": 54, "0_1_1_1_1_0_1_0": 54, "1_0_0_0_0_0_0_0": 54, "1_0_0_0_0_0_1_1": 54, "1_0_0_1_0_1_1_1": 54, "1_0_1_1_0_1_1_1": 54, "1_0_1_1_1_1_0_1": 54, "1_1_0_0_0_1_1_1": 54, "1_1_0_0_1_0_1_1": 54, "1_1_0_1_0_1_0_0": 54, "1_1_0_1_1_0_0_1": 54, "1_1_0_1_1_1_1_0": 54, "1_1_1_0_0_1_0_1": 54, "1_1_1_0_1_0_1_0": 54, "1_1_1_1_0_0_1_0": 54, "1_1_1_1_1_0_0_0": 54, "1_1_1_1_1_1_1_1": 54, "54080": 54, "507": 54, "108k": 54, "650": 54, "198": 54, "181": 54, "186": 54, "187": 54, "correctli": 54, "simplest": 54, "nnlm": 56, "complet": 56, "wget": [56, 64], "resourc": 56, "norm": 56, "gzip": 56, "prepare_lm_training_data": 56, "lm_data": 56, "grab": 56, "cup": 56, "coffe": 56, "sort_lm_training_data": 56, "sorted_lm_data": 56, "statist": 56, "lm_data_stat": 56, "aforement": 56, "repeat": 56, "rnn_lm": 56, "tie": 56, "hyper": [56, 64, 65], "coupl": [56, 64, 65], "dai": [56, 64, 65], "former": 58, "mask": [58, 61, 62], "wenet": 58, "did": 58, "request": 58, "complic": 58, "techniqu": 58, "bank": 58, "memor": 58, "histori": 58, "introduc": 58, "variant": 58, "pruned_stateless_emformer_rnnt2": 58, "conv_emformer_transducer_stateless": 58, "ourself": 58, "mechan": 58, "onlin": 60, "lstm_transducer_stateless": 60, "prepare_giga_speech": 60, "cj2vtpiwqhkn9q1tx6ptpg": 60, "dynam": [61, 62], "short": [61, 62], "2012": 61, "05481": 61, "flag": 61, "indic": [61, 62], "whether": 61, "uniformli": [61, 62], "seen": [61, 62], "97vkxf80ru61cnp2alwzzg": 61, "streaming_decod": [61, 62], "wise": [61, 62], "parallel": [61, 62], "bath": [61, 62], "parallelli": [61, 62], "seem": 61, "benefit": 61, "320m": 62, "550": 62, "basicli": 62, "scriptmodul": 62, "jit_trace_export": 62, "jit_trace_pretrain": 62, "monoton": 63, "condit": [64, 65], "variat": [64, 65], "autoencod": [64, 65], "adversari": [64, 65], "piper_phonem": 64, "numba": 64, "espnet_tts_frontend": 64, "monotonic_align": [64, 65], "build_ext": [64, 65], "inplac": [64, 65], "medium": 64, "ground": [64, 65], "truth": [64, 65], "test_onnx": [64, 65], "program": 64, "kotlin": 64, "java": 64, "swift": 64, "offlin": 64, "espeak": 64, "bz2": 64, "xf": 64, "thread": 64, "countri": 64, "plai": 64, "350": 65, "zrjin": 65, "synthesi": 66, "task": 66}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 26, 32, 33], "creat": [2, 13, 20, 25], "recip": [2, 66], "data": [2, 9, 11, 20, 25, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "prepar": [2, 9, 11, 20, 25, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "train": [2, 9, 16, 22, 25, 28, 29, 30, 31, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "decod": [2, 5, 6, 7, 9, 12, 25, 26, 31, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "pre": [2, 22, 28, 29, 30, 31, 35, 36, 37, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "model": [2, 5, 15, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "lodr": [4, 6], "rnn": [4, 55, 56], "transduc": [4, 6, 7, 28, 29, 30, 40, 46, 60, 61, 62], "wer": [4, 6, 7, 43], "differ": [4, 6, 7, 17], "beam": [4, 6, 7, 40], "size": [4, 6, 7], "languag": [5, 56], "lm": [6, 43, 55], "rescor": [6, 38, 43], "base": [6, 18, 19, 20], "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 40], "number": 6, "each": [6, 20], "field": 6, "i": 6, "test": [6, 7, 20, 25, 28, 29, 30], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 58], "view": 9, "avail": 9, "tag": 9, "cuda": [9, 25], "enabl": 9, "imag": 9, "cpu": 9, "onli": 9, "download": [9, 11, 25, 28, 29, 30, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "run": [9, 26, 64], "gpu": 9, "yesno": [9, 53], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 20, 31, 64], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 25, 28, 29, 30, 38, 40, 41, 43], "0": [10, 25], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": [11, 20], "environ": [13, 20, 25], "setup": 13, "virtual": [13, 25], "instal": [13, 25, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 64], "depend": [13, 64], "icefal": [13, 14, 24, 25, 28, 29, 30], "dummi": 14, "tutori": 14, "export": [15, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 46, 48, 49, 60, 61, 62, 64, 65], "paramet": 15, "via": [15, 28, 29, 30], "state_dict": [15, 26, 46, 48, 49, 60, 61, 62], "torch": [15, 25, 28, 29, 30, 32, 33, 46, 48, 49, 60, 61, 62], "jit": [15, 28, 29, 30, 32, 33, 46, 48, 49, 60, 61, 62], "script": [15, 32, 46, 48, 49, 61, 62], "onnx": [15, 31, 64], "two": 17, "approach": 17, "between": 17, "fst": [18, 20], "forc": [18, 19, 20], "align": [18, 19, 20, 64, 65], "content": [18, 24, 37, 66], "k2": [19, 25], "kaldi": 20, "get": 20, "comput": [20, 43], "log_prob": 20, "token2id": 20, "id2token": 20, "word2id": 20, "id2word": 20, "lexicon": [20, 64], "relat": 20, "convert": 20, "transcript": 20, "an": [20, 56], "graph": 20, "segment": 20, "word": 20, "us": [20, 26, 32, 33, 46, 48, 49, 60, 61, 62], "summari": 20, "huggingfac": [21, 23], "space": 23, "youtub": [23, 25], "video": [23, 25], "toolkit": 25, "cudnn": 25, "torchaudio": 25, "2": [25, 28, 29, 30, 38, 40, 41, 43], "3": [25, 28, 29, 30, 38, 40, 43], "lhots": 25, "4": [25, 28, 29, 30], "exampl": [25, 31, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "5": [25, 28, 29, 30], "6": [25, 28, 29, 30], "your": 25, "when": [26, 32, 33], "py": 26, "ncnn": [27, 28, 29, 30], "convemform": 28, "pnnx": [28, 29, 30], "trace": [28, 29, 30, 33, 60, 62], "torchscript": [28, 29, 30], "modifi": [28, 29, 30, 40], "encod": [28, 29, 30], "sherpa": [28, 29, 30, 31, 46, 61, 62, 64], "7": [28, 29], "option": [28, 29, 38, 41, 43, 46, 48, 49, 60, 61, 62], "int8": [28, 29], "quantiz": [28, 29], "lstm": [29, 41, 47, 52, 60], "stream": [30, 42, 57, 58, 61, 62], "zipform": [30, 35, 36, 48, 49, 62], "sound": 31, "finetun": [35, 36], "from": [35, 36], "adapt": 35, "fine": [35, 36, 37], "tune": [35, 36, 37], "supervis": 36, "tabl": [37, 66], "conform": [38, 43, 58], "ctc": [38, 41, 43, 47, 48, 51, 52, 54], "configur": [38, 41, 43, 46, 48, 49, 60, 61, 62], "log": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "usag": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62, 64], "case": [38, 40, 41, 43], "kaldifeat": [38, 40, 41, 43, 47, 51, 52, 54], "hlg": [38, 41, 43], "attent": [38, 43], "colab": [38, 40, 41, 43, 47, 51, 52, 54], "notebook": [38, 40, 41, 43, 47, 51, 52, 54], "deploy": [38, 43], "c": [38, 43], "aishel": 39, "stateless": 40, "loss": 40, "todo": 40, "greedi": 40, "search": [40, 64, 65], "tdnn": [41, 47, 51, 52, 54], "non": 42, "asr": [42, 57], "n": 43, "gram": 43, "distil": 44, "hubert": 44, "codebook": 44, "index": 44, "librispeech": [45, 59], "prune": [46, 61], "statelessx": [46, 61], "pretrain": [46, 48, 49, 60, 61, 62, 64, 65], "deploi": [46, 61, 62], "infer": [47, 51, 52, 54, 64, 65], "blank": 48, "skip": 48, "mmi": 49, "timit": 50, "ligru": 51, "emform": 58, "which": 60, "simul": [61, 62], "real": [61, 62], "tt": 63, "vit": [64, 65], "ljspeech": 64, "extra": 64, "build": [64, 65], "monoton": [64, 65], "vctk": 65}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [40, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [25, "training"], [38, "training"], [40, "training"], [41, "training"], [43, "training"], [44, "training"], [46, "training"], [47, "training"], [48, "training"], [49, "training"], [51, "training"], [52, "training"], [54, "training"], [60, "training"], [61, "training"], [62, "training"], [64, "training"], [65, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [25, "decoding"], [35, "decoding"], [38, "decoding"], [40, "decoding"], [41, "decoding"], [43, "decoding"], [44, "decoding"], [46, "decoding"], [47, "decoding"], [48, "decoding"], [49, "decoding"], [51, "decoding"], [52, "decoding"], [54, "decoding"], [60, "decoding"], [61, "decoding"], [62, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [58, "introduction"]], "View available tags": [[9, "view-available-tags"]], "CUDA-enabled docker images": [[9, "cuda-enabled-docker-images"]], "CPU-only docker images": [[9, "cpu-only-docker-images"]], "Download a docker image (CUDA)": [[9, "download-a-docker-image-cuda"]], "Download a docker image (CPU)": [[9, "download-a-docker-image-cpu"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [25, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [41, "data-preparation"], [43, "data-preparation"], [44, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"], [48, "data-preparation"], [49, "data-preparation"], [51, "data-preparation"], [52, "data-preparation"], [54, "data-preparation"], [60, "data-preparation"], [61, "data-preparation"], [62, "data-preparation"], [64, "data-preparation"], [65, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Two approaches": [[17, "two-approaches"]], "Differences between the two approaches": [[17, "differences-between-the-two-approaches"]], "FST-based forced alignment": [[18, "fst-based-forced-alignment"]], "Contents:": [[18, null], [24, null]], "k2-based forced alignment": [[19, "k2-based-forced-alignment"]], "Kaldi-based forced alignment": [[20, "kaldi-based-forced-alignment"]], "Prepare the environment": [[20, "prepare-the-environment"]], "Get the test data": [[20, "get-the-test-data"]], "Compute log_probs": [[20, "compute-log-probs"]], "Create token2id and id2token": [[20, "create-token2id-and-id2token"]], "Create word2id and id2word": [[20, "create-word2id-and-id2word"]], "Generate lexicon-related files": [[20, "generate-lexicon-related-files"]], "Convert transcript to an FST graph": [[20, "convert-transcript-to-an-fst-graph"]], "Force aligner": [[20, "force-aligner"]], "Segment each word using the computed alignments": [[20, "segment-each-word-using-the-computed-alignments"]], "Summary": [[20, "summary"]], "Huggingface": [[21, "huggingface"]], "Pre-trained models": [[22, "pre-trained-models"]], "Huggingface spaces": [[23, "huggingface-spaces"]], "YouTube Video": [[23, "youtube-video"], [25, "youtube-video"]], "Icefall": [[24, "icefall"]], "Installation": [[25, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[25, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[25, "install-torch-and-torchaudio"]], "(2) Install k2": [[25, "install-k2"]], "(3) Install lhotse": [[25, "install-lhotse"]], "(4) Download icefall": [[25, "download-icefall"]], "Installation example": [[25, "installation-example"]], "(1) Create a virtual environment": [[25, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[25, "id1"]], "(3) Install torch and torchaudio": [[25, "id2"]], "(4) Install k2": [[25, "id3"]], "(5) Install lhotse": [[25, "id5"]], "(6) Download icefall": [[25, "id6"]], "Test Your Installation": [[25, "test-your-installation"]], "Export model.state_dict()": [[26, "export-model-state-dict"], [46, "export-model-state-dict"], [48, "export-model-state-dict"], [49, "export-model-state-dict"], [60, "export-model-state-dict"], [61, "export-model-state-dict"], [62, "export-model-state-dict"]], "When to use it": [[26, "when-to-use-it"], [32, "when-to-use-it"], [33, "when-to-use-it"]], "How to export": [[26, "how-to-export"], [32, "how-to-export"], [33, "how-to-export"]], "How to use the exported model": [[26, "how-to-use-the-exported-model"], [32, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[26, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[27, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[28, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[28, "download-the-pre-trained-model"], [29, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[28, "install-ncnn-and-pnnx"], [29, "install-ncnn-and-pnnx"], [30, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[28, "export-the-model-via-torch-jit-trace"], [29, "export-the-model-via-torch-jit-trace"], [30, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[28, "export-torchscript-model-via-pnnx"], [29, "export-torchscript-model-via-pnnx"], [30, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[28, "test-the-exported-models-in-icefall"], [29, "test-the-exported-models-in-icefall"], [30, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[28, "modify-the-exported-encoder-for-sherpa-ncnn"], [29, "modify-the-exported-encoder-for-sherpa-ncnn"], [30, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[28, "optional-int8-quantization-with-sherpa-ncnn"], [29, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[29, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[30, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[31, "export-to-onnx"]], "sherpa-onnx": [[31, "sherpa-onnx"]], "Example": [[31, "example"]], "Download the pre-trained model": [[31, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [40, "download-the-pre-trained-model"], [41, "download-the-pre-trained-model"], [43, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"], [51, "download-the-pre-trained-model"], [52, "download-the-pre-trained-model"], [54, "download-the-pre-trained-model"]], "Export the model to ONNX": [[31, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[31, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[32, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[33, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[33, "how-to-use-the-exported-models"]], "Model export": [[34, "model-export"]], "Finetune from a pre-trained Zipformer model with adapters": [[35, "finetune-from-a-pre-trained-zipformer-model-with-adapters"]], "Model preparation": [[35, "model-preparation"], [36, "model-preparation"]], "Fine-tune with adapter": [[35, "fine-tune-with-adapter"]], "Export the model": [[35, "export-the-model"]], "Finetune from a supervised pre-trained Zipformer model": [[36, "finetune-from-a-supervised-pre-trained-zipformer-model"]], "Fine-tune": [[36, "fine-tune"]], "Fine-tune a pre-trained model": [[37, "fine-tune-a-pre-trained-model"]], "Table of Contents": [[37, null], [66, null]], "Conformer CTC": [[38, "conformer-ctc"], [43, "conformer-ctc"]], "Configurable options": [[38, "configurable-options"], [41, "configurable-options"], [43, "configurable-options"], [46, "configurable-options"], [48, "configurable-options"], [49, "configurable-options"], [60, "configurable-options"], [61, "configurable-options"], [62, "configurable-options"]], "Pre-configured options": [[38, "pre-configured-options"], [41, "pre-configured-options"], [43, "pre-configured-options"], [46, "pre-configured-options"], [48, "pre-configured-options"], [49, "pre-configured-options"], [60, "pre-configured-options"], [61, "pre-configured-options"], [62, "pre-configured-options"]], "Training logs": [[38, "training-logs"], [40, "training-logs"], [41, "training-logs"], [43, "training-logs"], [46, "training-logs"], [48, "training-logs"], [49, "training-logs"], [60, "training-logs"], [61, "training-logs"], [62, "training-logs"]], "Usage examples": [[38, "usage-examples"], [40, "usage-examples"], [41, "usage-examples"], [43, "usage-examples"]], "Case 1": [[38, "case-1"], [40, "case-1"], [41, "case-1"], [43, "case-1"]], "Case 2": [[38, "case-2"], [40, "case-2"], [41, "case-2"], [43, "case-2"]], "Case 3": [[38, "case-3"], [40, "case-3"], [43, "case-3"]], "Pre-trained Model": [[38, "pre-trained-model"], [40, "pre-trained-model"], [41, "pre-trained-model"], [43, "pre-trained-model"], [47, "pre-trained-model"], [51, "pre-trained-model"], [52, "pre-trained-model"], [54, "pre-trained-model"]], "Install kaldifeat": [[38, "install-kaldifeat"], [40, "install-kaldifeat"], [41, "install-kaldifeat"], [43, "install-kaldifeat"], [47, "install-kaldifeat"], [51, "install-kaldifeat"], [52, "install-kaldifeat"]], "Usage": [[38, "usage"], [40, "usage"], [41, "usage"], [43, "usage"]], "CTC decoding": [[38, "ctc-decoding"], [43, "ctc-decoding"], [43, "id2"]], "HLG decoding": [[38, "hlg-decoding"], [38, "id2"], [41, "hlg-decoding"], [43, "hlg-decoding"], [43, "id3"]], "HLG decoding + attention decoder rescoring": [[38, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[38, "colab-notebook"], [40, "colab-notebook"], [41, "colab-notebook"], [43, "colab-notebook"], [47, "colab-notebook"], [51, "colab-notebook"], [52, "colab-notebook"], [54, "colab-notebook"]], "Deployment with C++": [[38, "deployment-with-c"], [43, "deployment-with-c"]], "aishell": [[39, "aishell"]], "Stateless Transducer": [[40, "stateless-transducer"]], "The Model": [[40, "the-model"]], "The Loss": [[40, "the-loss"]], "Todo": [[40, "id1"]], "Greedy search": [[40, "greedy-search"]], "Beam search": [[40, "beam-search"]], "Modified Beam search": [[40, "modified-beam-search"]], "TDNN-LSTM CTC": [[41, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[42, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[43, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[43, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[43, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[43, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[43, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[44, "distillation-with-hubert"]], "Codebook index preparation": [[44, "codebook-index-preparation"]], "LibriSpeech": [[45, "librispeech"], [59, "librispeech"]], "Pruned transducer statelessX": [[46, "pruned-transducer-statelessx"], [61, "pruned-transducer-statelessx"]], "Usage example": [[46, "usage-example"], [48, "usage-example"], [49, "usage-example"], [60, "usage-example"], [61, "usage-example"], [62, "usage-example"]], "Export Model": [[46, "export-model"], [61, "export-model"], [62, "export-model"]], "Export model using torch.jit.script()": [[46, "export-model-using-torch-jit-script"], [48, "export-model-using-torch-jit-script"], [49, "export-model-using-torch-jit-script"], [61, "export-model-using-torch-jit-script"], [62, "export-model-using-torch-jit-script"]], "Download pretrained models": [[46, "download-pretrained-models"], [48, "download-pretrained-models"], [49, "download-pretrained-models"], [60, "download-pretrained-models"], [61, "download-pretrained-models"], [62, "download-pretrained-models"], [64, "download-pretrained-models"], [65, "download-pretrained-models"]], "Deploy with Sherpa": [[46, "deploy-with-sherpa"], [61, "deploy-with-sherpa"], [62, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[47, "tdnn-lstm-ctc"], [52, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[47, "inference-with-a-pre-trained-model"], [51, "inference-with-a-pre-trained-model"], [52, "inference-with-a-pre-trained-model"], [54, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[48, "zipformer-ctc-blank-skip"]], "Export models": [[48, "export-models"], [49, "export-models"], [60, "export-models"], [64, "export-models"], [65, "export-models"]], "Zipformer MMI": [[49, "zipformer-mmi"]], "TIMIT": [[50, "timit"]], "TDNN-LiGRU-CTC": [[51, "tdnn-ligru-ctc"]], "YesNo": [[53, "yesno"]], "TDNN-CTC": [[54, "tdnn-ctc"]], "Download kaldifeat": [[54, "download-kaldifeat"]], "RNN-LM": [[55, "rnn-lm"]], "Train an RNN language model": [[56, "train-an-rnn-language-model"]], "Streaming ASR": [[57, "streaming-asr"]], "Streaming Conformer": [[58, "streaming-conformer"]], "Streaming Emformer": [[58, "streaming-emformer"]], "LSTM Transducer": [[60, "lstm-transducer"]], "Which model to use": [[60, "which-model-to-use"]], "Export model using torch.jit.trace()": [[60, "export-model-using-torch-jit-trace"], [62, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[61, "simulate-streaming-decoding"], [62, "simulate-streaming-decoding"]], "Real streaming decoding": [[61, "real-streaming-decoding"], [62, "real-streaming-decoding"]], "Zipformer Transducer": [[62, "zipformer-transducer"]], "TTS": [[63, "tts"]], "VITS-LJSpeech": [[64, "vits-ljspeech"]], "Install extra dependencies": [[64, "install-extra-dependencies"]], "Build Monotonic Alignment Search": [[64, "build-monotonic-alignment-search"], [65, "build-monotonic-alignment-search"]], "Inference": [[64, "inference"], [65, "inference"]], "Usage in sherpa-onnx": [[64, "usage-in-sherpa-onnx"]], "Install sherpa-onnx": [[64, "install-sherpa-onnx"]], "Download lexicon files": [[64, "download-lexicon-files"]], "Run sherpa-onnx": [[64, "run-sherpa-onnx"]], "VITS-VCTK": [[65, "vits-vctk"]], "Recipes": [[66, "recipes"]]}, "indexentries": {}})
\ No newline at end of file
+Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "for-dummies/data-preparation", "for-dummies/decoding", "for-dummies/environment-setup", "for-dummies/index", "for-dummies/model-export", "for-dummies/training", "fst-based-forced-alignment/diff", "fst-based-forced-alignment/index", "fst-based-forced-alignment/k2-based", "fst-based-forced-alignment/kaldi-based", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Finetune/adapter/finetune_adapter", "recipes/Finetune/from_supervised/finetune_zipformer", "recipes/Finetune/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/RNN-LM/index", "recipes/RNN-LM/librispeech/lm-training", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/TTS/index", "recipes/TTS/ljspeech/vits", "recipes/TTS/vctk/vits", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "for-dummies/data-preparation.rst", "for-dummies/decoding.rst", "for-dummies/environment-setup.rst", "for-dummies/index.rst", "for-dummies/model-export.rst", "for-dummies/training.rst", "fst-based-forced-alignment/diff.rst", "fst-based-forced-alignment/index.rst", "fst-based-forced-alignment/k2-based.rst", "fst-based-forced-alignment/kaldi-based.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Finetune/adapter/finetune_adapter.rst", "recipes/Finetune/from_supervised/finetune_zipformer.rst", "recipes/Finetune/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/RNN-LM/index.rst", "recipes/RNN-LM/librispeech/lm-training.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/TTS/index.rst", "recipes/TTS/ljspeech/vits.rst", "recipes/TTS/vctk/vits.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Data Preparation", "Decoding", "Environment setup", "Icefall for dummies tutorial", "Model Export", "Training", "Two approaches", "FST-based forced alignment", "k2-based forced alignment", "Kaldi-based forced alignment", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Finetune from a pre-trained Zipformer model with adapters", "Finetune from a supervised pre-trained Zipformer model", "Fine-tune a pre-trained model", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "RNN-LM", "Train an RNN language model", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "TTS", "VITS-LJSpeech", "VITS-VCTK", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 38, 39, 40, 41, 43, 44, 47, 51, 52, 54, 56, 58, 64, 65], "tool": [0, 10, 25, 28], "make": [0, 1, 3, 20, 28, 29, 30, 35, 38, 40, 43, 58], "consist": [0, 40, 46, 60, 61, 62], "possibl": [0, 2, 3, 38, 43], "black": 0, "format": [0, 28, 29, 30, 35, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "flake8": 0, "check": [0, 25, 43, 56, 64], "qualiti": [0, 39], "isort": 0, "sort": [0, 25, 56], "import": [0, 9, 10, 15, 20, 25, 28, 61, 62], "The": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 26, 28, 29, 30, 35, 36, 38, 39, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "version": [0, 9, 13, 15, 24, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 51, 52, 61], "abov": [0, 4, 6, 7, 10, 13, 15, 20, 26, 28, 29, 30, 31, 38, 39, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61, 62, 64], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 25, 26, 28, 29, 30, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "22": [0, 9, 15, 20, 25, 28, 29, 35, 43, 51, 52, 54], "3": [0, 4, 6, 7, 9, 10, 11, 15, 20, 24, 26, 27, 31, 34, 35, 41, 44, 46, 47, 48, 49, 54, 56, 60, 61, 62, 64, 65], "0": [0, 1, 4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "5": [0, 7, 15, 20, 27, 34, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "4": [0, 4, 5, 6, 7, 9, 10, 11, 13, 15, 20, 24, 26, 27, 34, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "10": [0, 7, 9, 15, 20, 24, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "1": [0, 4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 27, 31, 32, 33, 34, 35, 36, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "after": [0, 1, 6, 9, 11, 12, 13, 16, 23, 25, 26, 28, 29, 30, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64], "run": [0, 2, 8, 10, 11, 13, 14, 15, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 65], "command": [0, 1, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 20, 25, 26, 28, 29, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "git": [0, 4, 6, 7, 9, 13, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 56], "clone": [0, 4, 6, 7, 9, 13, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 56], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 13, 15, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "github": [0, 2, 6, 9, 11, 13, 15, 22, 25, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "com": [0, 2, 6, 9, 11, 13, 22, 23, 25, 26, 28, 29, 32, 33, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "k2": [0, 2, 9, 10, 13, 15, 17, 18, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62, 64], "fsa": [0, 2, 9, 13, 15, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 38, 40, 43, 46, 48, 49, 60, 61, 62, 64], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 16, 20, 22, 23, 26, 27, 31, 32, 33, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "cd": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "pip": [0, 1, 6, 10, 13, 15, 25, 28, 31, 40, 64], "instal": [0, 1, 4, 6, 10, 14, 15, 17, 20, 21, 23, 24, 26, 27, 31, 34, 35, 36, 44, 46, 48, 49, 54, 60, 61, 62, 63], "pre": [0, 3, 4, 6, 7, 8, 9, 15, 21, 23, 24, 25, 27, 34, 44, 64, 66], "commit": [0, 25], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 22, 23, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "automat": [0, 14, 23, 44], "hook": 0, "invok": 0, "fail": [0, 20], "If": [0, 2, 4, 6, 7, 8, 9, 10, 11, 13, 15, 20, 23, 28, 29, 30, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "ani": [0, 4, 6, 7, 13, 20, 25, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 20, 21, 23, 24, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64], "wa": [0, 26, 43, 47], "success": [0, 25, 28, 29], "pleas": [0, 1, 2, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 23, 25, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "fix": [0, 9, 10, 13, 28, 29, 30, 43], "issu": [0, 4, 6, 7, 10, 25, 28, 29, 43, 44, 61, 62], "report": [0, 9, 10, 35, 44], "some": [0, 1, 4, 6, 9, 26, 28, 29, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "i": [0, 1, 2, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 23, 25, 26, 27, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64], "e": [0, 2, 4, 5, 6, 7, 13, 20, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "modifi": [0, 20, 27, 34, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "file": [0, 2, 9, 14, 15, 18, 23, 24, 26, 28, 29, 30, 32, 33, 34, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 65], "place": [0, 25, 26, 40, 43, 47], "so": [0, 4, 6, 7, 9, 13, 23, 24, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 9, 15, 23, 25, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "which": [0, 2, 4, 6, 7, 9, 11, 12, 15, 17, 23, 25, 26, 28, 29, 30, 31, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 61, 62, 64], "ha": [0, 2, 17, 24, 25, 27, 28, 29, 30, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 58, 60, 61, 62], "been": [0, 25, 27, 28, 29, 30, 40], "befor": [0, 1, 11, 13, 15, 20, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "further": [0, 4, 6, 7, 15], "chang": [0, 4, 6, 7, 10, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "all": [0, 9, 11, 13, 14, 17, 20, 22, 23, 26, 28, 29, 30, 32, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "again": [0, 28, 29, 54], "should": [0, 2, 4, 6, 11, 13, 20, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 18, 20, 21, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "time": [0, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "succeed": [0, 20], "want": [0, 4, 6, 7, 11, 13, 15, 25, 26, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "do": [0, 2, 4, 6, 13, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "Or": 0, "without": [0, 4, 6, 7, 9, 15, 17, 20, 21, 23, 38, 43], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 15, 16, 20, 25, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "sphinx": 1, "write": [1, 2, 3, 20], "have": [1, 2, 4, 6, 7, 8, 9, 11, 13, 20, 22, 23, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "prepar": [1, 3, 4, 8, 14, 16, 18, 24, 26, 37, 63], "environ": [1, 10, 11, 12, 14, 16, 18, 24, 28, 29, 30, 35, 36, 38, 39, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62], "doc": [1, 26, 58], "r": [1, 13, 20, 25, 28, 29, 30, 51, 52], "requir": [1, 4, 6, 11, 13, 15, 25, 30, 35, 36, 44, 56, 61, 62, 64, 65], "txt": [1, 4, 9, 11, 13, 15, 20, 25, 26, 28, 29, 30, 31, 32, 33, 35, 38, 40, 41, 43, 47, 51, 52, 54, 56, 64, 65], "set": [1, 4, 6, 7, 10, 12, 13, 16, 20, 25, 28, 29, 30, 35, 36, 37, 38, 40, 41, 43, 44, 46, 48, 49, 54, 56, 60, 61, 62], "up": [1, 25, 26, 28, 29, 30, 35, 38, 41, 43, 44, 46, 47, 48, 49, 61, 62], "readi": [1, 20, 38, 43, 44, 56], "refer": [1, 2, 5, 6, 7, 11, 13, 15, 18, 20, 25, 26, 27, 28, 29, 30, 32, 33, 35, 38, 40, 41, 43, 46, 47, 48, 51, 52, 54, 56, 58, 61, 62, 64], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 63], "local": [1, 9, 15, 20, 25, 46, 48, 49, 56, 60, 61, 62], "preview": 1, "what": [1, 2, 11, 15, 20, 25, 28, 29, 30, 40, 58, 64], "look": [1, 2, 4, 6, 7, 14, 20, 22, 25, 28, 29, 30, 38, 40, 41, 43, 44], "like": [1, 2, 9, 11, 20, 23, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61], "publish": [1, 26, 39], "html": [1, 2, 10, 11, 13, 15, 20, 25, 27, 28, 29, 30, 31, 32, 33, 46, 60, 61, 62, 64], "gener": [1, 6, 9, 14, 15, 18, 26, 28, 29, 30, 31, 32, 33, 37, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 64, 65], "view": [1, 8, 24, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "python3": [1, 9, 10, 13, 15, 25, 29, 30], "m": [1, 15, 20, 25, 28, 29, 30, 40, 46, 48, 49, 51, 52, 60, 61, 62], "server": [1, 23, 60], "It": [1, 2, 6, 7, 9, 11, 14, 15, 20, 21, 25, 27, 28, 29, 30, 31, 32, 33, 35, 38, 39, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "print": [1, 12, 16, 20, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "serv": [1, 46, 48, 49, 60, 61, 62], "port": [1, 14, 17, 35, 36, 44, 46, 48, 49, 60, 61, 62], "8000": [1, 11, 15, 54], "open": [1, 4, 6, 7, 9, 20, 24, 26, 28, 29, 30, 39, 40, 43, 44], "browser": [1, 20, 21, 23, 46, 48, 49, 60, 61, 62], "go": [1, 7, 38, 40, 43, 46, 48, 49, 60, 61, 62, 64], "read": [2, 11, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "code": [2, 3, 8, 10, 13, 15, 17, 20, 24, 25, 28, 29, 30, 38, 43, 44, 46, 47, 51, 52, 54, 58, 61, 62], "style": [2, 3, 24], "adjust": [2, 56, 64, 65], "design": 2, "python": [2, 9, 13, 15, 17, 25, 26, 28, 29, 30, 31, 32, 33, 38, 40, 43, 46, 48, 49, 56, 60, 61, 62, 64, 65], "recommend": [2, 6, 7, 9, 25, 35, 36, 38, 40, 41, 43, 44, 46, 61, 62], "test": [2, 4, 9, 15, 18, 24, 26, 27, 34, 35, 36, 38, 40, 41, 43, 44, 47, 48, 51, 52, 56, 64, 65], "valid": [2, 25, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "dataset": [2, 10, 11, 13, 14, 25, 26, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "lhots": [2, 9, 11, 13, 15, 24, 26, 28, 29, 30, 38, 40, 43], "readthedoc": [2, 11, 25], "io": [2, 9, 11, 13, 15, 25, 27, 28, 29, 30, 31, 32, 33, 46, 60, 61, 62, 64], "en": [2, 11, 25, 28], "latest": [2, 9, 11, 13, 23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "index": [2, 25, 27, 28, 29, 30, 31, 32, 33, 60, 61, 62], "yesno": [2, 8, 10, 11, 12, 13, 14, 15, 16, 24, 25, 42, 54, 66], "veri": [2, 3, 7, 13, 28, 29, 30, 35, 36, 40, 51, 52, 54, 61, 62], "good": [2, 7], "exampl": [2, 11, 13, 23, 24, 26, 28, 29, 30, 32, 33, 34, 44, 47, 51, 52, 54], "speech": [2, 11, 13, 14, 23, 24, 25, 27, 37, 39, 40, 54, 64, 65, 66], "pull": [2, 4, 6, 7, 9, 28, 29, 30, 31, 35, 36, 38, 40, 43, 56, 58], "380": [2, 28, 52], "show": [2, 4, 6, 7, 9, 15, 23, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "add": [2, 11, 28, 29, 30, 38, 40, 41, 61, 66], "new": [2, 3, 9, 13, 23, 25, 28, 29, 30, 35, 36, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 54, 60, 61, 62], "suppos": [2, 9, 61, 62], "would": [2, 11, 26, 28, 29, 30, 43, 47, 61, 62], "name": [2, 9, 10, 13, 15, 26, 28, 29, 30, 31, 38, 40, 46, 48, 49, 56, 61, 62], "foo": [2, 33, 38, 43, 46, 48, 49, 60, 61, 62], "eg": [2, 9, 10, 11, 12, 15, 16, 20, 22, 25, 26, 28, 29, 30, 31, 32, 33, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "mkdir": [2, 9, 28, 29, 38, 40, 41, 43, 47, 51, 52, 54], "p": [2, 4, 13, 20, 25, 28, 29, 40, 51, 52], "asr": [2, 4, 6, 7, 9, 10, 11, 12, 14, 15, 16, 20, 22, 24, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 66], "touch": 2, "sh": [2, 9, 11, 25, 26, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "chmod": 2, "x": [2, 4, 20, 30, 58], "simpl": [2, 12, 14, 16, 25, 40, 56], "own": [2, 11, 35, 36, 44, 46, 56, 61, 62], "otherwis": [2, 28, 29, 30, 36, 38, 40, 43, 44, 46, 48, 49, 60, 61, 62], "librispeech": [2, 4, 6, 7, 10, 20, 22, 24, 26, 28, 29, 30, 31, 32, 33, 35, 36, 42, 43, 44, 46, 47, 48, 49, 56, 57, 58, 60, 61, 62, 66], "assum": [2, 4, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 56, 60, 61, 62], "fanci": 2, "call": [2, 10, 31, 44, 56], "bar": [2, 33, 38, 43, 46, 48, 49, 60, 61, 62], "organ": 2, "wai": [2, 3, 15, 34, 46, 48, 49, 58, 60, 61, 62], "readm": [2, 38, 40, 41, 43, 47, 51, 52, 54], "md": [2, 22, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "asr_datamodul": [2, 9, 10, 15, 25], "pretrain": [2, 4, 6, 7, 15, 26, 28, 29, 30, 31, 33, 35, 36, 38, 40, 41, 43, 47, 51, 52, 54, 63], "For": [2, 4, 6, 7, 9, 10, 14, 20, 22, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "instanc": [2, 9, 10, 12, 16, 22, 28, 29, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "tdnn": [2, 9, 10, 12, 15, 16, 25, 39, 42, 45, 50, 53], "its": [2, 4, 20, 26, 27, 28, 29, 30, 33, 40, 48, 56], "directori": [2, 9, 11, 13, 24, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "structur": [2, 30], "descript": [2, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "contain": [2, 8, 11, 13, 14, 15, 20, 24, 26, 27, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 66], "inform": [2, 4, 6, 11, 12, 16, 25, 26, 38, 40, 41, 43, 46, 47, 48, 51, 52, 54, 58, 60, 61, 62], "g": [2, 4, 5, 6, 7, 11, 13, 20, 25, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "wer": [2, 5, 9, 12, 15, 25, 26, 35, 36, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "etc": [2, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "provid": [2, 11, 15, 23, 25, 26, 27, 28, 29, 30, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 66], "pytorch": [2, 10, 13, 20, 25, 28, 29, 30, 40], "dataload": [2, 25], "take": [2, 7, 9, 26, 44, 46, 54, 56, 61, 62, 64, 65], "input": [2, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54, 58], "checkpoint": [2, 4, 6, 7, 12, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "save": [2, 15, 16, 25, 26, 29, 30, 32, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "dure": [2, 4, 5, 7, 10, 13, 20, 23, 26, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "stage": [2, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "": [2, 4, 6, 7, 9, 14, 15, 16, 20, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "definit": [2, 28, 29], "neural": [2, 4, 6, 7, 35, 38, 43, 56], "network": [2, 35, 38, 40, 43, 46, 48, 49, 56, 60, 61, 62], "script": [2, 6, 7, 13, 14, 24, 25, 33, 34, 38, 40, 41, 43, 44, 47, 51, 52, 54, 56, 60], "infer": [2, 26, 28, 29, 63], "tdnn_lstm_ctc": [2, 41, 47, 52], "conformer_ctc": [2, 38, 43], "get": [2, 9, 13, 14, 15, 18, 23, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 54, 58, 60, 61, 62, 64], "feel": [2, 44, 56, 60], "result": [2, 4, 7, 9, 16, 22, 23, 26, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "everi": [2, 26, 46, 48, 49, 60, 61, 62], "kept": [2, 46, 61, 62], "self": [2, 27, 30, 58], "toler": 2, "duplic": 2, "among": [2, 25], "differ": [2, 9, 12, 18, 25, 28, 29, 30, 31, 35, 36, 37, 38, 39, 43, 44, 46, 58, 60, 61, 62, 64], "invoc": [2, 28, 29], "help": [2, 12, 14, 16, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "blob": [2, 11, 22, 25, 26, 33, 46, 48, 49, 60, 61, 62], "master": [2, 6, 9, 11, 15, 22, 25, 26, 29, 30, 32, 33, 35, 36, 40, 44, 46, 48, 49, 60, 61, 62], "transform": [2, 6, 7, 38, 43, 60], "conform": [2, 32, 39, 40, 42, 45, 46, 48, 60, 61, 62], "base": [2, 4, 7, 13, 17, 24, 30, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62], "lstm": [2, 27, 33, 34, 39, 42, 45, 50, 57, 59], "attent": [2, 30, 40, 41, 44, 58, 61, 62], "lm": [2, 4, 5, 7, 9, 11, 24, 25, 40, 46, 47, 51, 52, 54, 56, 61, 62, 66], "rescor": [2, 5, 24, 41, 47, 49, 51, 52, 54, 56], "demonstr": [2, 14, 15, 21, 23, 26, 31, 35], "consid": [2, 4, 30, 36], "colab": [2, 20, 25], "notebook": [2, 20, 25], "welcom": 3, "There": [3, 4, 15, 28, 29, 30, 31, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "mani": [3, 12, 25, 61, 62], "two": [3, 4, 11, 14, 15, 18, 20, 24, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 65], "them": [3, 5, 6, 21, 22, 23, 28, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "To": [3, 4, 5, 6, 7, 9, 11, 15, 20, 23, 25, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "document": [3, 24, 26, 27, 28, 29, 30, 31, 49, 64], "repositori": [3, 9, 28, 29, 30, 31], "recip": [3, 4, 6, 7, 9, 11, 15, 22, 24, 25, 26, 31, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "In": [3, 4, 6, 10, 15, 23, 25, 26, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 43, 44, 47, 51, 52, 54, 58], "page": [3, 23, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 66], "describ": [3, 5, 8, 9, 17, 18, 20, 21, 26, 28, 29, 31, 32, 33, 34, 38, 40, 41, 43, 46, 47, 51, 52, 61, 62, 64], "how": [3, 4, 5, 6, 7, 8, 9, 11, 12, 14, 15, 18, 20, 21, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "creat": [3, 4, 6, 7, 14, 15, 18, 24, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61], "data": [3, 4, 6, 7, 8, 13, 14, 15, 16, 18, 24, 26, 28, 29, 30, 31, 32, 33, 37, 39, 56, 63], "train": [3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 18, 20, 21, 23, 24, 26, 27, 32, 33, 34, 55, 58, 63, 66], "decod": [3, 4, 8, 10, 11, 14, 15, 17, 20, 23, 24, 28, 29, 30, 33, 34, 36, 37, 56], "model": [3, 4, 6, 7, 9, 11, 12, 14, 17, 18, 20, 21, 23, 24, 25, 27, 44, 55, 58, 63, 66], "As": [4, 5, 6, 7, 28, 40, 43, 44, 56], "type": [4, 6, 7, 9, 11, 15, 20, 25, 26, 28, 29, 30, 38, 40, 43, 46, 48, 49, 54, 58, 60, 61, 62, 64], "e2": [4, 7, 25, 56], "usual": [4, 6, 7, 12, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62, 64, 65], "an": [4, 5, 6, 7, 9, 11, 13, 15, 18, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 38, 39, 40, 43, 44, 46, 49, 54, 55, 60, 61, 62, 64, 65, 66], "intern": [4, 5], "languag": [4, 7, 11, 23, 24, 38, 40, 41, 55, 64, 66], "learn": [4, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "level": [4, 5, 15, 20, 56], "corpu": [4, 6, 7, 39, 56], "real": 4, "life": 4, "scenario": 4, "often": [4, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "mismatch": [4, 36, 61], "between": [4, 7, 18, 35, 46, 61, 62], "target": [4, 23, 25, 35], "space": [4, 21, 24, 56], "problem": [4, 6, 7, 25, 44], "when": [4, 6, 9, 10, 15, 23, 28, 29, 30, 34, 40, 43, 44, 46, 48, 49, 56, 61, 62], "act": 4, "against": [4, 25], "extern": [4, 5, 6, 7], "tutori": [4, 5, 6, 7, 13, 15, 18, 20, 24, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62, 64, 65], "low": [4, 28, 29, 37, 64], "order": [4, 13, 17, 25, 28, 29, 30, 38, 41, 43, 47, 51, 52], "densiti": 4, "ratio": [4, 20], "allevi": 4, "effect": [4, 7, 30, 35], "improv": [4, 5, 6, 7, 35, 37, 40, 56], "perform": [4, 6, 7, 18, 27, 35, 36, 37, 40, 44, 61], "languga": 4, "integr": [4, 23, 35], "pruned_transducer_stateless7_stream": [4, 6, 7, 30, 31, 62], "stream": [4, 6, 7, 15, 17, 24, 27, 28, 29, 31, 34, 38, 43, 51, 52, 60, 66], "howev": [4, 6, 7, 26, 29, 37, 44], "easili": [4, 6, 7, 35, 38, 41, 43], "appli": [4, 6, 7, 40, 58], "other": [4, 7, 9, 13, 14, 15, 20, 26, 29, 30, 31, 35, 40, 43, 44, 46, 47, 51, 52, 54, 58, 61, 62, 66], "encount": [4, 6, 7, 10, 25, 30, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "here": [4, 6, 7, 20, 26, 28, 29, 30, 38, 40, 41, 43, 44, 47, 58, 61], "simplic": [4, 6, 7], "same": [4, 6, 7, 20, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "domain": [4, 6, 7, 35, 36, 37], "gigaspeech": [4, 6, 7, 22, 32, 35, 36, 60], "first": [4, 6, 9, 10, 11, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "let": [4, 6, 7, 14, 25, 28, 29, 30, 35, 36, 38, 43, 56], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 40, 58, 62], "address": [4, 9, 15, 23, 25, 26, 28, 29, 30, 40, 46, 49, 60, 61, 62], "sourc": [4, 11, 13, 25, 26, 28, 29, 30, 38, 39, 40, 43], "acoust": [4, 61, 62], "similar": [4, 5, 36, 44, 48, 61, 62], "deriv": 4, "formula": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 11, 16, 20, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "score": [4, 5, 7, 38, 43, 46, 61, 62], "left": [4, 28, 30, 35, 40, 61, 62], "y_u": 4, "mathit": 4, "y": [4, 20], "right": [4, 28, 40, 58, 61], "log": [4, 9, 10, 12, 15, 16, 25, 28, 29, 30, 35, 47, 51, 52, 54, 64, 65], "y_": 4, "u": [4, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 54], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 9, 10, 61], "weight": [4, 15, 38, 41, 43, 48, 49, 56, 60], "respect": 4, "onli": [4, 6, 8, 11, 13, 14, 15, 20, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "compar": [4, 17, 28, 29, 30, 36, 61], "shallow": [4, 5, 24, 56], "fusion": [4, 5, 24, 56], "subtract": [4, 5], "work": [4, 9, 13, 15, 28, 29, 30, 43], "treat": [4, 29, 30], "predictor": 4, "joiner": [4, 28, 29, 30, 31, 33, 35, 36, 40, 46, 60, 61, 62], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 11, 20, 25, 38, 44, 46, 48, 49, 51, 52, 60, 61, 62], "gram": [4, 6, 25, 38, 40, 41, 46, 47, 49, 51, 52, 61, 62], "approxim": [4, 5], "ilm": 4, "lead": [4, 7, 12], "rnnt": [4, 46, 61, 62], "bi": [4, 6], "addit": [4, 37], "estim": 4, "li": 4, "choic": 4, "accord": [4, 56], "origin": [4, 5, 35, 36, 37], "paper": [4, 5, 35, 44, 46, 60, 61, 62, 64, 65], "achiev": [4, 6, 7, 35, 36, 56, 58], "both": [4, 36, 46, 48, 49, 58, 60, 61, 62], "intra": 4, "cross": 4, "much": [4, 28, 29, 35, 36], "faster": [4, 6, 35, 64], "evalu": 4, "now": [4, 6, 9, 13, 15, 20, 25, 28, 29, 30, 38, 43, 44, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62], "illustr": [4, 6, 7, 35, 36, 56], "purpos": [4, 6, 7, 28, 29, 35, 36, 56], "from": [4, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 20, 21, 23, 24, 25, 26, 28, 29, 30, 31, 37, 38, 39, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65, 66], "link": [4, 6, 7, 22, 25, 26, 27, 46, 48, 49, 60, 61, 62, 64, 65], "scratch": [4, 6, 7, 36, 46, 48, 49, 56, 60, 61, 62, 64, 65], "prune": [4, 6, 7, 26, 30, 31, 40, 42, 44, 45, 57, 58, 59, 60, 62], "statelessx": [4, 6, 7, 42, 44, 45, 57, 58, 59], "initi": [4, 6, 7, 9, 35, 36, 38, 41], "step": [4, 6, 7, 11, 14, 20, 25, 26, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 48, 49, 54, 56, 60, 61, 62], "download": [4, 6, 7, 8, 10, 13, 15, 20, 23, 24, 27, 34, 35, 36, 39, 44, 56, 63], "git_lfs_skip_smudg": [4, 6, 7, 28, 29, 30, 31, 35, 36, 56], "huggingfac": [4, 6, 7, 13, 22, 24, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 48, 49, 51, 52, 54, 56, 60, 64, 65], "co": [4, 6, 7, 22, 23, 25, 26, 28, 29, 30, 31, 35, 36, 38, 39, 40, 41, 43, 47, 48, 49, 51, 52, 54, 56, 60, 64, 65], "zengwei": [4, 6, 7, 28, 30, 31, 35, 36, 49, 56, 60, 64], "stateless7": [4, 6, 7, 30, 31], "2022": [4, 6, 7, 26, 28, 29, 30, 31, 40, 46, 48, 49, 60, 61], "12": [4, 6, 7, 9, 14, 20, 25, 26, 28, 29, 30, 31, 35, 38, 40, 41, 43, 46, 48, 49, 51, 54, 60, 61, 62, 64, 65], "29": [4, 6, 7, 20, 25, 30, 31, 38, 40, 41, 43, 47, 48, 51, 52], "exp": [4, 6, 7, 9, 15, 16, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "lf": [4, 6, 7, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 47, 49, 51, 52, 54, 56], "includ": [4, 6, 7, 28, 29, 30, 31, 35, 36, 46, 48, 49, 56, 60, 61, 62], "pt": [4, 6, 7, 9, 11, 15, 20, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "ln": [4, 6, 7, 9, 15, 26, 28, 29, 30, 31, 35, 36, 38, 43, 46, 48, 49, 56, 60, 61, 62], "epoch": [4, 6, 7, 9, 12, 15, 16, 25, 26, 28, 29, 30, 31, 32, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "99": [4, 6, 7, 15, 20, 25, 28, 29, 30, 31, 35, 36], "symbol": [4, 5, 6, 7, 20, 25, 40, 46, 61, 62], "load": [4, 6, 7, 9, 15, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "lang_bpe_500": [4, 6, 7, 26, 28, 29, 30, 31, 32, 33, 35, 36, 43, 46, 48, 49, 56, 60, 61, 62], "bpe": [4, 5, 6, 7, 26, 28, 29, 30, 31, 33, 35, 36, 43, 46, 48, 49, 56, 60, 61, 62], "done": [4, 6, 7, 9, 13, 15, 25, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "via": [4, 6, 7, 14, 25, 27, 32, 33, 34, 35, 36, 56], "exp_dir": [4, 6, 7, 9, 15, 25, 28, 29, 30, 40, 43, 44, 46, 48, 49, 61, 62], "avg": [4, 6, 7, 9, 12, 15, 25, 26, 28, 29, 30, 31, 32, 33, 35, 36, 40, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "averag": [4, 6, 7, 9, 12, 15, 25, 26, 28, 29, 30, 31, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "fals": [4, 6, 7, 9, 15, 20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 43, 44], "dir": [4, 6, 7, 20, 26, 28, 29, 30, 31, 32, 33, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "max": [4, 6, 7, 25, 26, 28, 29, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 64, 65], "durat": [4, 6, 7, 11, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "600": [4, 6, 7, 25, 26, 35, 43, 46, 48, 60, 61, 62], "chunk": [4, 6, 7, 28, 30, 31, 35, 61, 62], "len": [4, 6, 7, 20, 30, 31, 62], "32": [4, 6, 7, 20, 25, 28, 29, 30, 31, 35, 38, 40, 41, 62], "method": [4, 5, 7, 15, 23, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 60, 61, 62], "modified_beam_search": [4, 5, 6, 7, 23, 40, 44, 46, 48, 60, 61, 62], "clean": [4, 9, 15, 25, 30, 35, 38, 40, 43, 44, 46, 47, 48, 49, 60, 61, 62], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 11, 15, 20, 25, 28, 29, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "best": [4, 5, 6, 7, 28, 29, 30, 35, 36, 38, 41, 43], "7": [4, 6, 7, 9, 20, 25, 26, 27, 30, 34, 38, 41, 43, 46, 47, 51, 52, 60, 61], "93": [4, 6, 7, 15, 20], "Then": [4, 6], "necessari": [4, 44, 56], "note": [4, 5, 6, 7, 10, 11, 15, 17, 20, 26, 28, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "960": [4, 35, 36, 43, 46, 48, 49, 60, 61, 62], "hour": [4, 13, 35, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "ezerhouni": [4, 6, 7], "pushd": [4, 6, 7, 31], "popd": [4, 6, 7, 31], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 11, 17, 24, 25, 40, 54], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 25, 43], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 20, 25, 29, 35, 38, 43, 54], "lodr_scal": 4, "24": [4, 9, 10, 13, 15, 20, 25, 28, 29, 41, 47, 51, 52, 54], "modified_beam_search_lodr": [4, 5, 6], "scale": [4, 6, 7, 28, 29, 38, 43, 44, 47, 49, 51, 52], "embed": [4, 6, 7, 40, 46, 56, 60, 61, 62], "dim": [4, 6, 7, 28, 29, 30, 35, 40, 46, 56, 61], "2048": [4, 6, 7, 26, 28, 29, 30, 40, 56], "hidden": [4, 6, 7, 29, 56, 60], "num": [4, 6, 7, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62, 64, 65], "layer": [4, 6, 7, 28, 29, 30, 35, 40, 44, 46, 56, 58, 60, 61, 62], "vocab": [4, 6, 7, 43], "500": [4, 6, 7, 26, 28, 29, 30, 40, 43, 49, 60, 64, 65], "token": [4, 11, 20, 26, 28, 29, 30, 31, 32, 33, 35, 38, 40, 41, 43, 47, 51, 52, 54, 56, 64, 65], "ngram": [4, 43, 47, 51, 52], "2": [4, 6, 7, 9, 11, 13, 15, 20, 24, 26, 27, 34, 35, 36, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "extra": [4, 28, 29, 30, 40, 58, 61, 63], "argument": [4, 7, 15, 35, 36, 44, 58], "need": [4, 6, 11, 13, 14, 15, 17, 20, 23, 25, 26, 27, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62], "given": [4, 9, 11, 12, 13, 15, 20, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 61, 62, 64, 65], "specifi": [4, 7, 10, 12, 15, 16, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "neg": [4, 40], "number": [4, 7, 16, 23, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "obtain": [4, 7, 38, 40, 41, 43, 47, 51, 52], "shown": [4, 7, 35], "below": [4, 7, 9, 11, 12, 13, 14, 15, 16, 20, 25, 28, 29, 30, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 64], "61": [4, 6, 20], "6": [4, 6, 7, 9, 10, 11, 15, 20, 27, 34, 38, 40, 43, 46, 47, 51, 52, 60, 65], "74": [4, 6, 20, 25, 26], "recal": 4, "lowest": [4, 12, 15, 46, 48, 49, 60, 61, 62], "77": [4, 6, 7, 20, 25, 43], "08": [4, 6, 7, 9, 15, 20, 30, 43, 47, 49, 51, 52, 54, 60], "inde": 4, "even": [4, 23, 25, 29], "better": [4, 6], "increas": [4, 6, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "8": [4, 6, 7, 9, 10, 15, 20, 25, 26, 28, 29, 30, 35, 38, 40, 43, 44, 46, 47, 48, 49, 54, 60, 61, 62], "45": [4, 6, 15, 20, 25, 28, 30, 38, 40, 43], "38": [4, 6, 20, 25, 28, 38, 40, 43, 51], "23": [4, 6, 9, 10, 11, 15, 20, 25, 28, 29, 30, 35, 38, 40, 41, 43, 51, 52, 54], "section": [5, 8, 9, 10, 18, 20, 21, 25, 26, 31, 32, 33, 34, 38, 43], "langugag": 5, "transduc": [5, 24, 26, 27, 31, 34, 35, 36, 39, 42, 44, 45, 56, 57, 58, 59], "rnn": [5, 6, 7, 24, 29, 40, 46, 48, 60, 61, 62, 66], "avail": [5, 6, 8, 15, 24, 25, 26, 28, 29, 30, 36, 37, 38, 40, 43, 47, 51, 52, 54, 60], "beam": [5, 26, 60], "search": [5, 6, 7, 22, 23, 63], "realli": [5, 38, 41, 43, 46, 48, 49, 60, 61, 62], "valu": [5, 7, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "t": [5, 13, 14, 15, 17, 20, 25, 28, 29, 30, 31, 32, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "doe": [5, 15, 17, 20, 28, 29, 30, 38, 40, 43, 54], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 13, 14, 15, 21, 22, 25, 26, 27, 28, 29, 30, 31, 33, 35, 38, 40, 41, 43, 46, 48, 49, 54, 56, 58, 60, 61, 62, 64], "known": 5, "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6, 56], "re": [5, 6, 10, 38, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 24, 56], "commonli": [6, 7, 38, 40, 41, 43, 47, 51, 52, 54], "approach": [6, 18, 20, 24], "incorpor": 6, "unlik": 6, "more": [6, 14, 25, 28, 29, 30, 35, 38, 43, 44, 54, 56, 58, 60, 61, 64, 65], "effici": [6, 7, 35, 46, 61, 62], "than": [6, 25, 26, 29, 35, 38, 40, 41, 43, 46, 47, 48, 49, 54, 60, 61, 62], "sinc": [6, 13, 20, 25, 28, 29, 30, 36, 44, 54, 60], "less": [6, 26, 35, 43, 47, 54, 61, 62], "comput": [6, 15, 18, 25, 26, 28, 29, 30, 38, 40, 41, 44, 46, 47, 49, 51, 52, 54, 60, 61, 62], "gpu": [6, 7, 8, 13, 14, 24, 25, 28, 29, 35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 51, 52, 54, 60, 61, 62], "try": [6, 10, 12, 15, 21, 23, 44, 46, 48, 49, 60, 61, 62], "might": [6, 7, 29, 30, 61, 62], "ideal": [6, 7], "mai": [6, 7, 9, 25, 28, 29, 30, 36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62, 66], "With": [6, 25], "43": [6, 9, 20, 29, 30, 43], "great": 6, "made": [6, 28], "boost": [6, 7], "tabl": [6, 17, 23, 28, 29, 30], "67": [6, 20, 25], "59": [6, 15, 20, 25, 28, 41, 43], "86": [6, 20], "fact": 6, "arpa": [6, 11, 54], "performn": 6, "depend": [6, 14, 15, 17, 25, 38, 43, 63], "kenlm": 6, "kpu": 6, "archiv": [6, 56], "zip": 6, "execut": [6, 7, 13, 28, 35, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "9": [6, 9, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 54, 60, 61, 62], "57": [6, 20, 25, 29, 43, 47], "slightli": 6, "63": [6, 20, 40], "04": [6, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52], "52": [6, 20, 25, 38, 43], "73": [6, 20], "mention": [6, 58], "earlier": 6, "benchmark": [6, 40], "speed": [6, 28, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "132": [6, 20], "95": [6, 20, 39], "177": [6, 25, 26, 29, 30, 40, 41, 43], "96": [6, 20, 25, 35], "210": [6, 51, 52], "262": [6, 7, 15], "62": [6, 7, 20, 25, 43, 47], "65": [6, 7, 20, 25, 28], "352": [6, 7, 43], "58": [6, 7, 10, 20, 25, 43], "488": [6, 7, 28, 29, 30], "400": [6, 9, 39], "610": 6, "870": 6, "156": [6, 15, 20], "203": [6, 15, 26, 43], "255": [6, 29, 30], "160": [6, 15, 20], "263": [6, 9, 15, 25, 29], "singl": [6, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "32g": 6, "v100": [6, 38, 40, 41, 43], "vari": 6, "word": [7, 11, 12, 15, 18, 38, 40, 41, 43, 47, 51, 52, 54, 56], "error": [7, 9, 10, 12, 13, 15, 25, 28, 29, 30, 43], "rate": [7, 12, 20, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "These": [7, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "alreadi": [7, 11, 13, 25, 26, 37], "But": [7, 28, 46, 48, 49, 60, 61, 62], "long": [7, 28, 56, 64, 65], "true": [7, 9, 15, 25, 26, 28, 29, 30, 35, 36, 38, 40, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "either": [7, 15, 23, 38, 40, 41, 43, 61, 62], "choos": [7, 23, 25, 44, 46, 48, 49, 60, 61, 62], "three": [7, 15, 28, 29, 30, 33, 38, 40, 58], "associ": 7, "dimens": [7, 35, 46, 56, 61, 62], "obviou": 7, "rel": [7, 37], "reduct": [7, 15, 25, 28, 29, 48], "around": [7, 36], "A": [7, 14, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 60, 61, 62], "few": [7, 11, 28, 29, 30, 44], "paramet": [7, 14, 26, 28, 29, 30, 32, 35, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 56, 60, 61, 62, 64, 65], "tune": [7, 24, 28, 29, 30, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62, 66], "control": [7, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "too": 7, "small": [7, 35, 36, 37, 40, 51, 52, 54], "fulli": 7, "util": [7, 9, 10, 15, 20, 25, 43], "larg": [7, 13], "domin": 7, "bad": 7, "typic": [7, 35, 38, 40, 41, 43], "activ": [7, 13, 23, 25], "path": [7, 9, 15, 23, 25, 26, 28, 29, 30, 33, 36, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "trade": 7, "off": [7, 28], "accuraci": [7, 28, 29, 37, 39], "larger": [7, 29, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "slower": 7, "built": [8, 9, 25, 64], "imag": [8, 24], "cpu": [8, 12, 13, 14, 15, 16, 17, 20, 24, 25, 26, 28, 29, 30, 32, 38, 46, 48, 49, 54, 61, 62, 64], "still": [8, 28, 29, 30, 37], "introduct": [8, 24, 57, 66], "tag": [8, 24], "cuda": [8, 10, 15, 17, 20, 24, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62], "enabl": [8, 25, 44], "within": [8, 14, 21, 23, 24, 28, 29], "updat": [8, 28, 29, 30, 35], "host": [9, 26], "hub": [9, 20], "k2fsa": 9, "find": [9, 10, 16, 21, 22, 23, 26, 28, 29, 30, 33, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "dockerfil": 9, "tree": [9, 11, 32, 33, 38, 40, 41, 43, 47, 51, 52, 54, 60], "item": [9, 14, 20], "curl": 9, "registri": 9, "v2": [9, 30, 38, 43], "jq": 9, "give": [9, 11, 15, 20, 40], "someth": [9, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "torch2": [9, 13, 15], "cuda12": 9, "cuda11": [9, 10, 25], "torch1": [9, 10, 25], "cuda10": 9, "13": [9, 10, 15, 20, 25, 26, 28, 29, 30, 36, 40, 41, 43, 47, 48, 51], "releas": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 64], "torch": [9, 10, 13, 14, 20, 24, 26, 27, 34, 38, 40, 43], "select": [9, 12, 13, 14, 23, 25, 28, 29, 30, 46, 47, 51, 52, 54, 60, 61, 62], "appropri": [9, 25], "combin": [9, 12, 28, 29, 30], "visit": [9, 22, 23, 46, 48, 49, 60, 61, 62, 64, 65], "pkg": 9, "py3": [9, 10, 25], "v1": [9, 38, 41, 43, 47, 51, 52], "current": [9, 23, 28, 29, 40, 44, 58, 60, 61, 62, 64, 65, 66], "ghcr": 9, "alwai": [9, 25, 26], "sudo": [9, 38, 41], "rm": 9, "bin": [9, 13, 25, 28, 29, 30, 38, 43], "bash": 9, "start": [9, 11, 12, 14, 15, 16, 20, 23, 25, 26, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "interfac": 9, "present": [9, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "root": [9, 20, 28, 29, 30, 56], "60c947eac59c": 9, "workspac": 9, "export": [9, 10, 11, 12, 13, 14, 16, 24, 25, 37, 38, 40, 41, 43, 44, 47, 51, 52, 54, 63], "pythonpath": [9, 11, 12, 13, 15, 16, 25, 28, 29, 30], "user": [9, 10, 13, 25], "copi": [9, 20, 25, 58], "switch": [9, 25, 28, 29, 30, 38, 43, 49], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 25, 30], "site": [9, 10, 15, 25, 30], "packag": [9, 10, 15, 25, 30, 64, 65], "__init__": [9, 10, 15, 25, 26, 28, 29, 30, 38, 40, 43], "line": [9, 10, 11, 28, 29, 30, 46, 56, 61, 62], "modul": [9, 13, 24, 28, 30, 35, 48, 61], "_k2": [9, 10, 25], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 24], "libcuda": 9, "cannot": [9, 24, 28, 29, 30], "share": [9, 24, 25], "object": [9, 24, 25, 38, 40, 41, 46, 54, 60, 61], "No": [9, 13, 17, 24, 28, 29, 30, 54], "stub": 9, "list": [9, 15, 20, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52], "16": [9, 15, 20, 25, 26, 28, 29, 30, 33, 35, 38, 40, 41, 43, 46, 47, 51, 52, 54, 60, 61, 62], "second": [9, 14, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "2023": [9, 15, 25, 28, 29, 30, 35, 36, 48, 56, 65], "01": [9, 11, 15, 25, 28, 40, 41, 43, 44, 48], "02": [9, 11, 25, 26, 28, 29, 30, 35, 40, 43, 46, 52, 60, 61, 64], "06": [9, 15, 25, 26, 28, 35, 36, 41, 43, 47, 54], "info": [9, 15, 25, 26, 28, 29, 30, 35, 38, 40, 41, 43, 47, 51, 52, 54], "264": [9, 25, 30], "posixpath": [9, 15, 25, 28, 29, 30, 40, 43], "lang_dir": [9, 15, 25, 40, 43], "lang_phon": [9, 11, 15, 25, 41, 47, 51, 52, 54], "feature_dim": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 54], "search_beam": [9, 15, 25, 38, 43, 54], "20": [9, 14, 15, 20, 25, 26, 28, 30, 35, 36, 38, 40, 41, 43, 46, 47, 51, 52, 54, 56, 61], "output_beam": [9, 15, 25, 38, 43, 54], "min_active_st": [9, 15, 25, 38, 43, 54], "30": [9, 10, 15, 20, 25, 28, 29, 30, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "max_active_st": [9, 15, 25, 38, 43, 54], "10000": [9, 15, 25, 38, 43, 54], "use_double_scor": [9, 15, 25, 38, 43, 54], "14": [9, 10, 15, 20, 25, 26, 28, 29, 32, 38, 43, 46, 47, 48, 51, 60, 61, 62], "feature_dir": [9, 15, 25, 43], "fbank": [9, 11, 15, 25, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54], "max_dur": [9, 15, 25, 43], "bucketing_sampl": [9, 15, 25, 43], "num_bucket": [9, 15, 25, 43], "concatenate_cut": [9, 15, 25, 43], "duration_factor": [9, 15, 25, 43], "gap": [9, 15, 25, 43], "on_the_fly_feat": [9, 15, 25, 43], "shuffl": [9, 15, 25, 43], "return_cut": [9, 15, 25, 43], "num_work": [9, 15, 25, 43], "env_info": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "sha1": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 25], "date": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43], "tue": [9, 25, 28, 43], "jul": [9, 15, 25], "25": [9, 15, 20, 25, 26, 28, 29, 38, 43, 46, 51, 52, 54, 61], "36": [9, 20, 25, 28, 40, 43, 44], "dev": [9, 10, 15, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "7640d663": 9, "branch": [9, 15, 25, 26, 28, 29, 30, 38, 40, 43, 48], "375520d": 9, "fri": [9, 26], "28": [9, 20, 25, 28, 29, 40, 43, 47, 64], "07": [9, 25, 28, 29, 30, 38, 40, 41, 43], "hostnam": [9, 15, 25, 26, 28, 29, 30, 40], "ip": [9, 15, 25, 26, 28, 29, 30, 40], "172": 9, "17": [9, 20, 25, 26, 28, 29, 30, 38, 43, 51, 52, 60], "401": 9, "lexicon": [9, 11, 15, 18, 25, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "168": [9, 15, 20, 25, 47], "compil": [9, 15, 25, 28, 29, 38, 40, 43], "linv": [9, 11, 15, 25, 40, 43, 54], "403": [9, 47], "273": [9, 15, 25, 26, 40], "devic": [9, 15, 20, 25, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 61, 62], "406": [9, 43], "291": [9, 25], "424": 9, "218": [9, 15, 25, 29], "about": [9, 11, 12, 14, 15, 16, 20, 25, 28, 29, 30, 35, 40, 44, 46, 49, 60, 61, 62], "cut": [9, 15, 25, 43], "425": [9, 29, 43], "252": [9, 25], "504": 9, "204": [9, 25, 30, 43], "batch": [9, 15, 17, 25, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "process": [9, 15, 17, 25, 26, 28, 29, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "until": [9, 15, 25, 43, 48], "w": [9, 20, 25, 43, 51, 52], "nnpack": 9, "cpp": [9, 28, 32], "53": [9, 15, 20, 25, 30, 38, 46, 47, 52, 60, 61], "could": [9, 28, 29, 30, 35, 36, 37, 38, 41, 56], "reason": [9, 14, 26, 28, 29, 30, 36, 61], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 25, 38], "transcript": [9, 15, 18, 25, 38, 39, 40, 41, 43, 46, 47, 51, 52, 60, 61, 62], "store": [9, 11, 15, 25, 43, 56], "recog": [9, 15, 25, 40, 43], "test_set": [9, 15, 25, 54], "688": 9, "564": [9, 15, 25], "240": [9, 15, 25, 38, 54], "ins": [9, 15, 25, 43, 54], "del": [9, 15, 20, 25, 43, 54], "sub": [9, 15, 25, 43, 54], "690": 9, "249": [9, 25, 29], "wrote": [9, 15, 25, 43], "detail": [9, 11, 15, 20, 25, 27, 31, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 58, 60, 61, 62, 64, 65], "stat": [9, 15, 25, 43], "err": [9, 15, 25, 40, 43], "316": [9, 25, 43], "congratul": [9, 13, 20, 25, 28, 29, 30, 38, 41, 43, 47, 51, 52, 54, 64], "finish": [9, 14, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62], "successfulli": [9, 13, 25, 28, 29, 30, 64], "collect": [10, 13, 25, 56], "post": 10, "correspond": [10, 22, 23], "solut": 10, "One": 10, "torchaudio": [10, 13, 18, 20, 24, 58], "cu111": 10, "torchvis": 10, "f": [10, 13, 15, 20, 25, 51, 52, 64], "org": [10, 13, 20, 25, 39, 40, 46, 56, 60, 61, 62], "whl": [10, 13, 25], "torch_stabl": [10, 13, 25], "throw": [10, 28, 29, 30], "while": [10, 16, 25, 28, 29, 30, 35, 38, 40, 41, 43, 44, 46, 48, 49, 56, 60, 61, 62], "That": [10, 11, 14, 15, 16, 17, 28, 29, 44, 46, 60, 61, 62], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 61, 62], "recent": 10, "last": 10, "yesnoasrdatamodul": 10, "home": [10, 20, 28, 29, 38, 43], "xxx": [10, 20, 26, 28, 29, 30], "next": [10, 13, 14, 23, 25, 28, 29, 30, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62], "gen": [10, 13, 14, 23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "kaldi": [10, 11, 13, 14, 17, 18, 23, 24, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "34": [10, 20, 28, 29], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 20, 25, 28, 30, 40, 43, 47, 51], "tensorboard": [10, 16, 25, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 13, 25], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "linux": [10, 13, 14, 23, 25, 27, 28, 29, 30, 31], "x86_64": [10, 25, 28], "egg": 10, "handl": [10, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "except": [10, 26], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 48], "104": [10, 15, 20, 25], "rais": 10, "anaconda": 10, "maco": [10, 13, 14, 23, 27, 28, 29, 30, 31], "probabl": [10, 40, 46, 48, 60, 61, 62], "variabl": [10, 12, 13, 16, 25, 28, 29, 30, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 16, 28], "libpython": 10, "abl": 10, "insid": [10, 33], "codna_prefix": 10, "ld_library_path": 10, "setup": [11, 14, 20, 24, 25, 28, 35, 36, 38, 40, 41, 43, 44, 46, 47, 51, 52, 54, 61, 62, 64, 65], "everyth": [11, 20, 27], "tmp": [11, 12, 13, 15, 16, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64], "each": [11, 15, 18, 26, 28, 29, 31, 35, 38, 40, 41, 43, 46, 48, 49, 56, 58, 60, 61, 62], "exist": 11, "anyth": [11, 21, 23], "els": [11, 20], "wonder": [11, 15], "url": [11, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "varieti": 11, "folder": [11, 25, 26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "wav": [11, 15, 20, 26, 28, 29, 30, 31, 33, 38, 40, 41, 43, 46, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "scp": 11, "feat": 11, "put": [11, 13, 25, 28, 29, 48, 61], "l": [11, 20, 25, 28, 29, 30, 40, 51, 52, 54], "waves_yesno": [11, 15, 25], "tar": [11, 25, 64], "gz": [11, 25, 56], "l41": 11, "extract": [11, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "yesno_cuts_test": 11, "jsonl": [11, 26], "yesno_cuts_train": 11, "yesno_feats_test": 11, "lca": 11, "yesno_feats_train": 11, "hlg": [11, 15, 25, 47, 51, 52, 54], "l_disambig": [11, 54], "lexicon_disambig": [11, 20, 54], "manifest": [11, 25, 35, 36, 44], "yesno_recordings_test": 11, "yesno_recordings_train": 11, "yesno_supervisions_test": 11, "yesno_supervisions_train": 11, "18": [11, 20, 25, 28, 29, 30, 38, 40, 41, 43, 46, 47, 51, 52, 60, 61, 62], "thei": [11, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "idea": [11, 15, 20, 58], "examin": 11, "relat": [11, 18, 26, 35, 36, 38, 40, 43, 47, 51, 52, 54, 64, 65], "gunzip": 11, "c": [11, 17, 20, 25, 40, 41, 46, 48, 49, 54, 60, 61, 62, 64], "head": [11, 20, 25, 35, 40, 58], "output": [11, 12, 13, 15, 20, 26, 28, 29, 30, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "id": [11, 38, 41, 43, 47, 51, 52], "0_0_0_0_1_1_1_1": 11, "channel": [11, 23, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "sampling_r": 11, "num_sampl": 11, "50800": 11, "35": [11, 20, 25, 26, 28, 29, 30, 40, 43, 60], "channel_id": 11, "0_0_0_1_0_1_1_0": 11, "48880": 11, "0_0_1_0_0_1_1_0": 11, "48160": 11, "audio": [11, 20, 25, 51, 52, 64], "l300": 11, "mean": [11, 14, 15, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "field": [11, 39], "per": [11, 40, 46, 61, 62], "recording_id": 11, "NO": [11, 15, 54], "ye": [11, 15, 17, 54], "hebrew": [11, 54], "supervis": [11, 24, 37, 66], "l510": 11, "furthermor": [11, 40], "featur": [11, 17, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "compress": [11, 25], "lilcom": [11, 25], "cutset": [11, 36], "recordingset": 11, "supervisionset": 11, "featureset": 11, "num_fram": [11, 20], "635": 11, "num_featur": 11, "frame_shift": 11, "storage_typ": 11, "lilcom_chunki": 11, "storage_path": 11, "storage_kei": 11, "13000": 11, "3570": 11, "record": [11, 23, 29, 30, 38, 39, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "monocut": 11, "611": 11, "16570": 11, "12964": 11, "2929": 11, "602": 11, "32463": 11, "12936": 11, "2696": 11, "actual": [11, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "separ": [11, 31, 56], "lang": [11, 20, 25, 26, 40, 43, 49], "quit": [12, 14, 16, 37, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "cuda_visible_devic": [12, 16, 25, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "usag": [12, 15, 16, 26, 28, 29, 30, 32, 33, 47, 51, 52, 54, 63], "one": [12, 23, 26, 28, 29, 30, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64], "tini": [13, 14], "well": [13, 26, 35, 54, 66], "hundr": 13, "thousand": 13, "virtualenv": [13, 25], "icefall_env": [13, 15], "interpret": 13, "usr": 13, "prefix": [13, 26], "pkg_resourc": 13, "wheel": [13, 25, 28], "remeb": 13, "continu": [13, 15, 20, 28, 29, 30, 31, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "caution": [13, 38, 43], "matter": [13, 25, 28], "torchaduio": 13, "from_wheel": [13, 15, 25], "dev20231220": 13, "china": [13, 25, 39], "\u4e2d\u56fd\u56fd\u5185\u7528\u6237": [13, 25], "\u5982\u679c\u8bbf\u95ee\u4e0d\u4e86": [13, 25], "\u8bf7\u4f7f\u7528": [13, 25], "cn": [13, 25], "anytim": 13, "modulenotfounderror": 13, "don": [13, 14, 15, 17, 20, 25, 28, 29, 30, 32, 36, 38, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "walk": [14, 20], "recognit": [14, 23, 24, 27, 28, 29, 37, 39, 40, 54, 66], "system": [14, 56], "out": [14, 44, 56], "minut": [14, 56], "sequenti": 14, "part": [14, 15, 23, 25, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62, 64, 65], "window": [14, 23, 27, 28, 29, 30, 31], "commun": 14, "appreci": 14, "virtual": 14, "curiou": 14, "quick": 14, "state_dict": [14, 24, 34, 38, 40, 41, 43, 47, 51, 52, 54], "jit": [14, 24, 27, 34, 43], "onnx": [14, 24, 26, 34, 35, 63, 65], "torchscript": [15, 27, 32, 33, 34], "trace": [15, 24, 27, 32, 34], "explain": 15, "kind": [15, 43, 46, 48, 49, 60, 61, 62], "produc": [15, 27, 46, 48, 49, 60, 61, 62], "03": [15, 25, 26, 29, 35, 40, 43, 51, 52, 60, 64], "912": [15, 26], "76": [15, 20, 25, 54], "lr": [15, 25, 35, 36, 40, 60], "weight_decai": [15, 25], "1e": [15, 25], "start_epoch": [15, 25], "best_train_loss": [15, 25, 26, 28, 29, 30], "inf": [15, 25, 26, 28, 29, 30], "best_valid_loss": [15, 25, 26, 28, 29, 30], "best_train_epoch": [15, 25, 26, 28, 29, 30], "best_valid_epoch": [15, 25, 26, 29, 30], "batch_idx_train": [15, 25, 26, 28, 29, 30], "log_interv": [15, 25, 26, 28, 29, 30], "reset_interv": [15, 25, 26, 28, 29, 30], "valid_interv": [15, 25, 26, 28, 29, 30], "beam_siz": [15, 25, 26, 40], "sum": [15, 20, 25], "913": 15, "950": 15, "971": [15, 52], "106": [15, 20, 25, 29, 43], "Not": 15, "974": 15, "111": [15, 20, 25, 43], "weights_onli": 15, "kei": [15, 28, 29, 30, 43], "bia": 15, "running_mean": 15, "running_var": 15, "num_batches_track": 15, "output_linear": 15, "48": [15, 20, 25, 28, 29, 35, 38, 40], "089": 15, "090": 15, "ad79f1c699c684de9785ed6ca5edb805a41f78c3": 15, "wed": [15, 25, 28, 38, 40, 43], "26": [15, 20, 25, 28, 29, 30, 40, 43, 52], "09": [15, 26, 29, 38, 40, 41, 43, 60], "aa073f6": 15, "none": [15, 20, 25, 38, 43], "9a47c08": 15, "mon": [15, 29, 30], "aug": [15, 44], "50": [15, 20, 25, 26, 28, 29, 30, 43, 46, 51, 60, 61, 62], "privat": 15, "fangjun": [15, 19, 25, 26, 28, 29, 30, 40, 43], "macbook": 15, "pro": [15, 38, 43], "127": [15, 20, 25, 28, 29, 54], "092": 15, "103": [15, 20], "272": 15, "109": [15, 20, 25, 38, 43], "112": [15, 20, 28, 29, 30], "115": [15, 20, 28, 29, 38, 43], "253": 15, "386": 15, "556": 15, "557": 15, "558": 15, "248": [15, 40], "559": 15, "315": [15, 28, 38, 40, 41, 43, 47], "ident": [15, 20], "kaldifeat": 15, "csukuangfj": [15, 25, 26, 28, 29, 31, 38, 40, 41, 43, 47, 51, 52, 54, 60, 64], "dev20231221": 15, "0_0_0_1_0_0_0_1": [15, 54], "0_0_1_0_0_0_1_0": [15, 54], "19": [15, 20, 26, 28, 29, 30, 35, 36, 38, 43, 47, 51, 52], "208": [15, 43], "136": [15, 20, 43], "num_class": [15, 38, 43, 54], "sample_r": [15, 20, 26, 38, 40, 43, 54], "words_fil": [15, 38, 43, 54], "sound_fil": [15, 26, 38, 40, 43, 54], "142": [15, 20, 28, 38, 41, 43], "144": [15, 20, 43], "212": 15, "213": [15, 54], "construct": [15, 20, 26, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 54], "170": [15, 47], "sound": [15, 26, 28, 29, 30, 33, 34, 38, 40, 41, 43, 47, 51, 52, 54], "224": 15, "176": [15, 28, 40, 43], "304": [15, 29], "214": [15, 40, 43], "47": [15, 20, 25, 28, 29, 30, 36, 38, 43], "44": [15, 20, 25, 28, 29, 35, 43, 51, 52], "666": 15, "667": 15, "670": 15, "677": [15, 28], "100": [15, 20, 25, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "843": 15, "cpu_jit": [15, 32, 38, 43, 46, 48, 49, 61, 62], "confus": [15, 32], "move": [15, 32, 46, 48, 49, 61, 62], "map_loc": 15, "resid": 15, "default": [15, 28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "jit_pretrain": [15, 33, 48, 49, 60], "nn": [15, 40, 46, 48, 49, 60, 61, 62], "56": [15, 20, 25, 28, 29, 43, 51], "00": [15, 25, 28, 38, 40, 41, 43, 47, 51, 52, 54], "603": 15, "121": [15, 20, 47], "nn_model": [15, 38, 43], "129": [15, 20, 41], "640": [15, 25, 30], "134": [15, 20, 38], "641": 15, "138": [15, 20, 38, 40], "148": [15, 20, 35], "642": 15, "154": [15, 20, 41], "727": 15, "190": [15, 47], "192": [15, 30, 35, 43], "export_onnx": 15, "onnxruntim": [15, 31], "888": [15, 38], "83": [15, 20, 43, 47], "892": 15, "diagnost": 15, "verbos": 15, "warn": 15, "21": [15, 20, 25, 26, 28, 35, 38, 40, 43, 51, 52], "047": [15, 40], "meta_data": 15, "model_typ": 15, "model_author": 15, "comment": 15, "non": [15, 24, 43, 58, 61, 66], "vocab_s": [15, 26, 28, 29, 30, 40], "049": 15, "140": [15, 20, 25, 41], "int8": [15, 27, 34, 65], "quantiz": [15, 27, 34, 44], "075": 15, "onnx_quant": 15, "538": [15, 43], "tensor": [15, 25, 29, 30, 38, 40, 41, 43, 46, 54, 60, 61], "transpose_1_output_0": 15, "081": 15, "151": [15, 20, 28], "float32": [15, 28, 29, 30], "onnx_pretrain": [15, 31], "260": [15, 30, 43], "166": [15, 20], "171": [15, 25, 41, 43, 51, 52], "173": 15, "267": [15, 29, 40, 51, 52], "270": 15, "180": [15, 29, 38, 43], "279": [15, 43], "196": 15, "318": [15, 28, 29], "232": 15, "234": [15, 43], "deploi": [15, 31, 38, 43], "sherpa": [15, 23, 27, 32, 33, 34, 60, 63], "framework": [15, 23, 46, 61], "_": [15, 20, 44], "ncnn": [15, 24, 34], "forc": [17, 24], "align": [17, 24, 63], "instead": [17, 30, 40, 61], "support": [17, 20, 25, 27, 28, 29, 30, 38, 40, 43, 46, 48, 49, 58, 60, 61, 62, 64, 65], "api": [17, 18, 20], "ctc": [18, 20, 39, 42, 45, 49, 50, 53], "loss": [18, 20, 25, 28, 29, 38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "log_prob": [18, 46, 61, 62], "token2id": 18, "id2token": 18, "word2id": 18, "id2word": 18, "convert": [18, 25, 28, 29, 30, 43], "graph": [18, 38, 41, 43, 46, 47, 51, 52, 61, 62], "segment": [18, 25], "summari": 18, "todo": 19, "through": [20, 39], "sure": [20, 28, 29, 30], "NOT": [20, 38, 40, 43, 54], "wave": [20, 26, 28, 29, 30, 38, 43], "speech_fil": 20, "download_asset": 20, "asset": 20, "lab41": 20, "sri": 20, "voic": 20, "src": [20, 28, 30], "sp0307": 20, "ch127535": 20, "sg0042": 20, "waveform": 20, "sr": 20, "had": [20, 26, 43, 47], "curios": 20, "besid": 20, "me": 20, "moment": [20, 23], "split": [20, 44], "shape": [20, 25, 30], "assert": [20, 30], "ndim": 20, "16000": [20, 26, 38, 40, 41, 43, 47, 48, 51, 52], "cach": [20, 25, 30], "filenam": [20, 25, 28, 29, 30, 31, 32, 33, 48, 49, 60, 62, 64, 65], "content": [20, 28, 29, 30], "element": [20, 30], "bundl": [20, 25], "pipelin": 20, "mms_fa": 20, "is_avail": 20, "get_model": 20, "with_star": 20, "inference_mod": 20, "emiss": 20, "size": [20, 25, 26, 28, 29, 30, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "169": [20, 43, 51, 52], "get_dict": 20, "star": [20, 25, 28, 29, 30], "ep": 20, "dict": [20, 26, 30], "enumer": [20, 25], "prepare_lang": 20, "add_disambig_symbol": 20, "max_disambig_id": 20, "encod": [20, 27, 31, 33, 34, 35, 38, 40, 41, 43, 46, 47, 48, 54, 58, 60, 61, 62], "utf": 20, "join": 20, "k": [20, 30, 46, 51, 52, 60, 61, 62], "rang": 20, "o": [20, 25], "b": [20, 40, 43, 51, 52], "d": [20, 51, 52, 56], "h": [20, 25], "15": [20, 25, 26, 28, 29, 30, 35, 36, 40, 41, 43, 51, 54, 56], "v": [20, 28, 29, 30, 43, 51, 52], "j": [20, 28, 29, 38, 43], "z": [20, 51, 52], "q": 20, "27": [20, 25, 28, 29, 30, 35, 36, 38, 40, 47, 52], "charact": [20, 40], "unit": [20, 40], "prepare_lang_fst": 20, "hl": 20, "rw": [20, 28, 29, 30], "13k": 20, "jun": 20, "7k": 20, "kaldi_decod": 20, "decodablectc": 20, "fasterdecod": 20, "fasterdecoderopt": 20, "kaldifst": 20, "def": 20, "force_align": 20, "stdvectorfst": 20, "contigu": 20, "numpi": [20, 25], "decoder_opt": 20, "max_act": 20, "3000": [20, 26, 28, 29, 30], "reached_fin": 20, "return": 20, "ok": 20, "best_path": 20, "get_best_path": 20, "isymbols_out": 20, "osymbols_out": 20, "total_weight": 20, "get_linear_symbol_sequ": 20, "linear": [20, 28, 29, 40], "sequenc": [20, 61, 62], "increment": [20, 28, 29, 30], "main": [20, 25, 38, 43, 58], "ctc_forced_alignment_api_tutori": 20, "frame": [20, 25, 35, 40, 46, 48, 61, 62], "eas": [20, 28, 29, 30], "31": [20, 28, 29, 30, 35, 43], "33": [20, 25, 28, 29, 38, 39, 40, 43, 51], "37": [20, 29, 38, 40, 43, 51], "40": [20, 25, 28, 29, 30, 41, 43, 47, 51, 52], "41": [20, 25, 28, 30, 38, 40, 51, 54], "46": [20, 25, 29, 38, 43], "49": [20, 25, 28, 29, 43, 52, 54], "51": [20, 25, 28, 38, 43, 54], "54": [20, 25, 29, 30, 43, 47, 51, 52], "55": [20, 25, 28, 41, 43, 51], "60": 20, "64": [20, 25, 26, 28, 35, 40, 61], "66": [20, 25, 29, 36], "68": [20, 25, 43], "69": [20, 30], "70": [20, 25], "71": [20, 25, 43, 47], "72": [20, 40, 43], "75": [20, 25, 28], "78": 20, "79": [20, 25], "80": [20, 26, 28, 29, 30, 38, 40, 43], "81": 20, "82": 20, "84": [20, 29, 38], "85": 20, "87": [20, 25, 28], "88": [20, 28, 40], "89": [20, 25, 38], "90": [20, 25, 28], "91": [20, 25], "92": [20, 25, 43], "94": 20, "97": [20, 25, 28, 38], "98": [20, 38], "101": [20, 29], "102": [20, 30, 38], "105": [20, 43], "107": [20, 29, 47], "108": 20, "110": [20, 43], "113": [20, 40, 43], "114": 20, "116": 20, "117": [20, 43], "118": [20, 25, 43], "119": [20, 54], "120": 20, "122": [20, 43], "123": 20, "124": [20, 25, 38, 43], "125": [20, 43, 54], "126": [20, 43], "128": [20, 35, 43], "130": 20, "131": [20, 38, 43], "133": [20, 30], "135": [20, 43, 54], "137": 20, "139": [20, 54], "141": [20, 28], "143": [20, 54], "145": 20, "146": [20, 25], "147": [20, 29, 30], "149": [20, 25, 28, 43], "150": [20, 38, 43], "152": 20, "153": [20, 43, 54], "155": 20, "157": [20, 25], "158": [20, 29], "159": [20, 29, 43, 54], "161": [20, 41, 43], "162": [20, 43], "163": [20, 40, 43], "164": 20, "165": [20, 38, 43], "167": [20, 25], "merg": 20, "merge_token": 20, "token_span": 20, "span": 20, "end": [20, 40, 46, 48, 49, 54, 60, 61, 62, 64, 65], "unflatten": 20, "list_": 20, "length": [20, 28, 30, 40, 56, 61, 62], "ret": 20, "append": 20, "word_span": 20, "tokenspan": 20, "preview_word": 20, "x0": 20, "int": [20, 38, 43], "x1": 20, "3f": 20, "sec": 20, "ipython": 20, "displai": [20, 38, 40, 41, 43], "along": 20, "stamp": [20, 40], "644": 20, "664": 20, "704": [20, 25, 38, 51], "845": 20, "885": 20, "026": [20, 30], "086": 20, "790": 20, "871": 20, "314": [20, 25], "334": 20, "414": 20, "495": [20, 25], "575": 20, "595": [20, 29], "756": 20, "837": 20, "repost": 20, "whole": [20, 35, 36, 43, 47, 51, 52, 61, 62], "youtub": [21, 24, 43, 44, 46, 47, 48, 49, 60, 61, 62], "video": [21, 24, 43, 44, 46, 47, 48, 49, 60, 61, 62], "upload": [22, 23, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "specif": [22, 31, 40], "aishel": [22, 24, 38, 40, 41, 42, 66], "wenetspeech": [22, 32], "ipad": 23, "phone": 23, "screenshot": [23, 38, 40, 41, 43, 44, 46, 54, 60, 61], "chines": [23, 39, 40], "english": [23, 36, 54, 60], "greedi": 23, "click": [23, 25, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "button": 23, "submit": 23, "wait": 23, "bottom": [23, 46, 48, 49, 60, 61, 62], "subscrib": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "nadira": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "povei": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "www": [23, 25, 39, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62], "uc_vaumpkminz1pnkfxan9mw": [23, 25, 43, 44, 46, 47, 48, 49, 60, 61, 62], "dummi": [24, 43], "toolkit": 24, "cudnn": 24, "docker": [24, 25], "frequent": 24, "ask": [24, 64], "question": 24, "faq": 24, "oserror": 24, "libtorch_hip": 24, "attributeerror": 24, "distutil": 24, "attribut": [24, 30, 43], "libpython3": 24, "timit": [24, 42, 51, 52, 66], "tt": [24, 64, 65, 66], "vit": [24, 63, 66], "ljspeech": [24, 63, 66], "vctk": [24, 63, 66], "fine": [24, 44, 66], "finetun": [24, 37, 66], "zipform": [24, 27, 31, 34, 37, 42, 45, 56, 57, 59, 66], "adapt": [24, 37, 66], "contribut": 24, "guid": 25, "suggest": [25, 36, 46, 48, 49, 60, 61, 62], "strongli": 25, "point": [25, 26, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "sever": [25, 26, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 58, 60, 61, 62], "just": [25, 28, 29, 30, 56, 58], "kuangfangjun": [25, 28, 29, 30], "cpython3": 25, "final": [25, 26, 28, 29, 43, 47], "9422m": 25, "creator": 25, "cpython3posix": 25, "dest": 25, "fj": [25, 26, 28, 29, 30, 40, 43], "clear": 25, "no_vcs_ignor": 25, "global": 25, "seeder": 25, "fromappdata": 25, "app_data_dir": 25, "ad": [25, 28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 58, 60, 61, 62], "seed": 25, "bashactiv": 25, "cshellactiv": 25, "fishactiv": 25, "nushellactiv": 25, "powershellactiv": 25, "pythonactiv": 25, "determin": 25, "nvidia": [25, 38, 40, 41, 43], "smi": 25, "510": 25, "driver": 25, "greater": 25, "our": [25, 28, 29, 30, 32, 33, 43, 44, 46, 58, 61, 62], "case": [25, 26, 28, 29, 30, 37, 46, 48, 49, 60, 61, 62], "verifi": 25, "nvcc": 25, "copyright": 25, "2005": 25, "2019": 25, "corpor": 25, "wed_oct_23_19": 25, "38_pdt_2019": 25, "v10": 25, "cu116": 25, "compat": 25, "stabl": 25, "matrix": 25, "2bcu116": 25, "cp38": 25, "linux_x86_64": 25, "1983": 25, "mb": [25, 28, 29, 30], "________________________________________": 25, "gb": [25, 40], "764": 25, "kb": [25, 28, 29, 30, 51, 52], "eta": 25, "satisfi": 25, "extens": 25, "__version__": 25, "dev20230725": 25, "pypi": 25, "tuna": 25, "tsinghua": 25, "edu": 25, "resolv": 25, "ubuntu": [25, 28, 29, 30], "2bcuda11": 25, "manylinux_2_17_x86_64": 25, "manylinux2014_x86_64": 25, "graphviz": 25, "de": [25, 26, 28, 29, 30, 40], "5e": 25, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 25, "cento": 25, "2009": 25, "core": 25, "cmake": [25, 28, 29, 38, 43], "gcc": 25, "cmake_cuda_flag": 25, "wno": 25, "deprec": [25, 40], "lineinfo": 25, "expt": 25, "extend": 25, "lambda": 25, "use_fast_math": 25, "xptxa": 25, "gencod": 25, "arch": 25, "compute_35": 25, "sm_35": 25, "compute_50": 25, "sm_50": 25, "compute_60": 25, "sm_60": 25, "compute_61": 25, "sm_61": 25, "compute_70": 25, "sm_70": 25, "compute_75": 25, "sm_75": 25, "compute_80": 25, "sm_80": 25, "compute_86": 25, "sm_86": 25, "donnx_namespac": 25, "onnx_c2": 25, "compute_52": 25, "sm_52": 25, "xcudaf": 25, "diag_suppress": 25, "cc_clobber_ignor": 25, "integer_sign_chang": 25, "useless_using_declar": 25, "set_but_not_us": 25, "field_without_dll_interfac": 25, "base_class_has_different_dll_interfac": 25, "dll_interface_conflict_none_assum": 25, "dll_interface_conflict_dllexport_assum": 25, "implicit_return_from_non_void_funct": 25, "unsigned_compare_with_zero": 25, "declared_but_not_referenc": 25, "bad_friend_decl": 25, "relax": 25, "constexpr": 25, "d_glibcxx_use_cxx11_abi": 25, "option": [25, 27, 31, 34, 40, 44, 47, 51, 52, 54], "wall": 25, "strict": [25, 30, 39], "overflow": 25, "unknown": 25, "pragma": 25, "cmake_cxx_flag": 25, "unus": 25, "nvtx": 25, "disabl": [25, 26, 28, 29], "debug": 25, "sync": 25, "kernel": [25, 28, 30, 35, 40], "memori": [25, 28, 35, 38, 40, 43, 58], "alloc": 25, "214748364800": 25, "byte": [25, 28, 29, 30], "200": [25, 26, 28, 29, 30, 38, 43, 44, 51, 52, 54], "abort": 25, "__file__": 25, "cpython": [25, 28], "gnu": [25, 28], "req": 25, "vq12fd5i": 25, "filter": 25, "quiet": [25, 39], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 25, "metadata": [25, 51, 52], "pyproject": 25, "toml": 25, "cytoolz": 25, "3b": 25, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 25, "pyyaml": 25, "c8": 25, "6b": 25, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 25, "ma": 25, "nylinux_2_17_x86_64": 25, "736": 25, "dataclass": 25, "2f": 25, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 25, "dev0": 25, "7640d66": 25, "a8": 25, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 25, "linux_2_17_x86_64": 25, "tqdm": 25, "e6": 25, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 25, "audioread": 25, "5d": 25, "cb": 25, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 25, "377": 25, "tabul": 25, "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 25, "1a": 25, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 25, "ab": [25, 46, 60, 61, 62], "c3": 25, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 25, "intervaltre": 25, "fb": 25, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 25, "soundfil": 25, "bd": 25, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 25, "py2": 25, "toolz": 25, "7f": 25, "5c": 25, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 25, "sortedcontain": 25, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 25, "cffi": 25, "b7": 25, "8b": 25, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 25, "442": 25, "pycpars": 25, "d5": 25, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 25, "687627": 25, "sha256": 25, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 25, "ephem": 25, "wwtk90_m": 25, "7a": 25, "8e": 25, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 25, "23704": 25, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 25, "9c": 25, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 25, "26098": 25, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 25, "f3": 25, "ed": 25, "2b": 25, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 25, "remot": 25, "12942": 25, "count": 25, "total": [25, 29, 30, 35, 38, 40, 41, 43, 44, 46, 47, 54, 60, 61], "delta": 25, "reus": 25, "pack": [25, 56, 61, 62], "12875": 25, "receiv": 25, "mib": 25, "8835": 25, "dl_dir": [25, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "___________________________________________________": 25, "70m": 25, "1mb": 25, "718": 25, "compute_fbank_yesno": 25, "_______________________________________________________________________________": 25, "82it": 25, "778": 25, "______________________________________________________________________________": 25, "256": [25, 30, 35, 51, 52], "92it": 25, "project": 25, "kaldilm": 25, "csrc": [25, 43], "arpa_file_pars": 25, "cc": 25, "void": 25, "arpafilepars": 25, "std": 25, "istream": 25, "275": [25, 38], "compile_hlg": 25, "276": 25, "309": 25, "ctc_topo": 25, "max_token_id": 25, "310": 25, "intersect": [25, 46, 61, 62], "323": 25, "lg": [25, 46, 49, 61, 62], "connect": [25, 26, 35, 43, 46, 47, 60, 61, 62], "class": [25, 43], "341": 25, "rag": 25, "raggedtensor": 25, "remov": [25, 38, 40, 41, 43, 47, 51, 52], "disambigu": 25, "354": 25, "remove_epsilon": 25, "445": 25, "arc": 25, "compos": 25, "446": 25, "447": 25, "fault": 25, "dump": 25, "protocol_buffers_python_implement": 25, "674": 25, "interest": [25, 44, 46, 48, 49, 60, 61, 62], "936": 25, "481": 25, "482": 25, "world_siz": [25, 44], "master_port": 25, "12354": 25, "num_epoch": 25, "3fb0a43": 25, "thu": [25, 26, 28, 29, 30, 40, 43, 47], "05": [25, 26, 28, 29, 35, 36, 38, 40, 41, 43, 52, 56, 65], "74279": [25, 26, 28, 29, 30, 40], "1220091118": 25, "57c4d55446": 25, "sph26": 25, "941": 25, "949": 25, "965": [25, 38], "244": 25, "967": 25, "199": [25, 43, 47], "singlecutsampl": 25, "205": [25, 43], "968": 25, "565": [25, 43], "422": 25, "065": 25, "over": [25, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "2436": 25, "tot_loss": 25, "681": [25, 28], "4561": 25, "2828": 25, "7076": 25, "22192": 25, "444": 25, "9002": 25, "18067": 25, "011": 25, "2555": 25, "2695": 25, "484": 25, "34971": 25, "331": [25, 28, 29, 43, 47], "4688": 25, "368": 25, "633": 25, "2532": 25, "242": [25, 38, 43], "1139": 25, "1592": 25, "522": [25, 43], "1627": 25, "209": [25, 47], "07055": 25, "1175": 25, "07091": 25, "847": 25, "07731": 25, "427": [25, 29, 43], "04391": 25, "05341": 25, "884": 25, "04384": 25, "387": [25, 52], "03458": 25, "04616": 25, "707": [25, 38, 43], "03379": 25, "758": [25, 43], "433": [25, 43], "01054": 25, "980": [25, 43], "009014": 25, "009974": 25, "489": [25, 38], "01085": 25, "258": [25, 51, 52], "01172": 25, "01055": 25, "621": [25, 54], "01074": 25, "699": 25, "866": 25, "01044": 25, "844": 25, "008942": 25, "221": [25, 43], "01082": 25, "970": [25, 43], "01169": 25, "247": 25, "01073": 25, "326": [25, 29], "555": 25, "840": 25, "841": 25, "855": 25, "868": 25, "882": 25, "883": 25, "701": 25, "702": [25, 43], "fun": [25, 28, 29], "variou": [25, 31, 34, 66], "period": [26, 28], "disk": 26, "optim": [26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "resum": [26, 35, 36, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "strip": 26, "reduc": [26, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "pruned_transducer_stateless3": [26, 32, 58], "almost": [26, 46, 58, 61, 62], "stateless3": [26, 28], "repo": [26, 31], "those": 26, "iter": [26, 28, 29, 30, 33, 46, 48, 49, 60, 61, 62], "1224000": 26, "greedy_search": [26, 35, 36, 40, 46, 48, 60, 61, 62], "test_wav": [26, 28, 29, 30, 31, 38, 40, 41, 43, 47, 51, 52, 54], "1089": [26, 28, 29, 30, 31, 43, 47], "134686": [26, 28, 29, 30, 31, 43, 47], "0001": [26, 28, 29, 30, 31, 43, 47], "1221": [26, 28, 29, 43, 47], "135766": [26, 28, 29, 43, 47], "0002": [26, 28, 29, 43, 47], "multipl": [26, 38, 40, 41, 43, 47, 51, 52, 54], "Its": [26, 28, 29, 30, 43], "233": [26, 28, 29], "265": 26, "subsampling_factor": [26, 29, 30, 38, 40, 43], "encoder_dim": [26, 28, 29, 30], "512": [26, 28, 29, 30, 35, 38, 40, 43], "nhead": [26, 28, 30, 38, 40, 43, 46, 61], "dim_feedforward": [26, 28, 29, 40], "num_encoder_lay": [26, 28, 29, 30, 40], "decoder_dim": [26, 28, 29, 30], "joiner_dim": [26, 28, 29, 30], "model_warm_step": [26, 28, 29], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 26, "oct": [26, 43], "miss": [26, 28, 29, 30, 40, 43], "cu102": [26, 28, 29, 30], "1013": 26, "c39cba5": 26, "dirti": [26, 28, 29, 38, 43], "ceph": [26, 38, 40, 43], "0324160024": 26, "65bfd8b584": 26, "jjlbn": 26, "bpe_model": [26, 28, 29, 30, 43], "max_context": 26, "max_stat": 26, "context_s": [26, 28, 29, 30, 40], "max_sym_per_fram": [26, 40], "simulate_stream": 26, "decode_chunk_s": 26, "left_context": 26, "dynamic_chunk_train": 26, "causal_convolut": 26, "short_chunk_s": [26, 30, 61, 62], "num_left_chunk": [26, 30], "blank_id": [26, 28, 29, 30, 40], "unk_id": 26, "271": [26, 29], "612": 26, "458": 26, "giga": [26, 29, 60], "623": 26, "277": 26, "78648040": 26, "951": [26, 43], "285": [26, 40, 43], "952": 26, "295": [26, 38, 40, 41, 43], "957": 26, "301": [26, 43], "700": 26, "329": [26, 29, 43], "388": 26, "earli": [26, 28, 29, 30, 43, 47], "nightfal": [26, 28, 29, 30, 43, 47], "THE": [26, 28, 29, 30, 43, 47], "yellow": [26, 28, 29, 30, 43, 47], "lamp": [26, 28, 29, 30, 43, 47], "light": [26, 28, 29, 30, 43, 47], "AND": [26, 28, 29, 30, 43, 47], "THERE": [26, 28, 29, 30, 43, 47], "squalid": [26, 28, 29, 30, 43, 47], "quarter": [26, 28, 29, 30, 43, 47], "OF": [26, 28, 29, 30, 43, 47], "brothel": [26, 28, 29, 30, 43, 47], "god": [26, 43, 47], "AS": [26, 43, 47], "direct": [26, 43, 47], "consequ": [26, 43, 47], "sin": [26, 43, 47], "man": [26, 43, 47], "punish": [26, 43, 47], "her": [26, 43, 47], "love": [26, 43, 47], "child": [26, 43, 47], "whose": [26, 40, 43, 47], "ON": [26, 28, 43, 47], "THAT": [26, 43, 47], "dishonor": [26, 43, 47], "bosom": [26, 43, 47], "TO": [26, 43, 47], "parent": [26, 43, 47], "forev": [26, 43, 47], "WITH": [26, 43, 47], "race": [26, 43, 47], "descent": [26, 43, 47], "mortal": [26, 43, 47], "BE": [26, 43, 47], "bless": [26, 43, 47], "soul": [26, 43, 47], "IN": [26, 43, 47], "heaven": [26, 43, 47], "yet": [26, 28, 29, 43, 47], "THESE": [26, 43, 47], "thought": [26, 43, 47], "affect": [26, 43, 47], "hester": [26, 43, 47], "prynn": [26, 43, 47], "hope": [26, 39, 43, 47], "apprehens": [26, 43, 47], "390": 26, "down": [26, 38, 43, 46, 48, 49, 60, 61, 62], "reproduc": [26, 43], "9999": [26, 48, 49, 60], "symlink": 26, "pass": [26, 30, 38, 40, 41, 43, 46, 48, 49, 58, 60, 61, 62], "convemform": [27, 34, 58], "platform": [27, 31], "android": [27, 28, 29, 30, 31, 64], "raspberri": [27, 31], "pi": [27, 31], "\u7231\u82af\u6d3e": 27, "maix": 27, "iii": 27, "axera": 27, "rv1126": 27, "static": 27, "binari": [27, 28, 29, 30, 38, 40, 41, 43, 46, 54, 60, 61, 64], "pnnx": [27, 34], "conv": [28, 29], "emform": [28, 29, 32], "stateless2": [28, 29, 60], "pretrained_model": [28, 29, 30], "online_transduc": 28, "jit_xxx": [28, 29, 30], "anywher": [28, 29], "submodul": 28, "recurs": 28, "init": 28, "dcmake_build_typ": [28, 38, 43], "dncnn_python": 28, "dncnn_build_benchmark": 28, "dncnn_build_exampl": 28, "dncnn_build_tool": 28, "j4": 28, "pwd": 28, "compon": [28, 58], "ncnn2int8": [28, 29], "am": 28, "sai": [28, 29, 30, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62], "later": [28, 29, 30, 38, 41, 43, 46, 47, 48, 49, 51, 52, 60, 61, 62], "termin": 28, "tencent": [28, 29], "modif": [28, 40], "offici": 28, "synchron": 28, "renam": [28, 29, 30], "conv_emformer_transducer_stateless2": [28, 58], "cnn": [28, 30, 35], "context": [28, 35, 40, 46, 58, 60, 61, 62], "configur": [28, 30, 40, 44, 47, 51, 52, 54, 64, 65], "accordingli": [28, 29, 30], "yourself": [28, 29, 30, 44, 61, 62], "220": [28, 40, 41, 43], "229": [28, 38], "best_v": 28, "alid_epoch": 28, "subsampl": [28, 61, 62], "ing_factor": 28, "a34171ed85605b0926eebbd0463d059431f4f74a": 28, "dec": 28, "ver": 28, "ion": 28, "530e8a1": 28, "op": 28, "1220120619": [28, 29, 30], "7695ff496b": [28, 29, 30], "s9n4w": [28, 29, 30], "icefa": 28, "ll": 28, "transdu": 28, "cer": 28, "use_averaged_model": [28, 29, 30], "cnn_module_kernel": [28, 30], "left_context_length": 28, "chunk_length": 28, "right_context_length": 28, "memory_s": 28, "231": [28, 29, 30], "053": 28, "022": 28, "708": [28, 38, 40, 43, 54], "75490012": 28, "320": [28, 40], "682": 28, "lh": [28, 29, 30], "289m": 28, "jan": [28, 29, 30], "289": 28, "roughli": [28, 29, 30], "equal": [28, 29, 30, 61, 62], "1024": [28, 29, 30, 35, 60], "287": [28, 54], "1010k": [28, 29], "decoder_jit_trac": [28, 29, 30, 33, 60, 62], "283m": 28, "encoder_jit_trac": [28, 29, 30, 33, 60, 62], "0m": [28, 29], "joiner_jit_trac": [28, 29, 30, 33, 60, 62], "found": [28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "param": [28, 29, 30], "503k": [28, 29], "437": [28, 29, 30], "142m": 28, "79k": 28, "5m": [28, 29], "architectur": [28, 29, 30, 60], "editor": [28, 29, 30], "283": [28, 30], "1010": [28, 29], "503": [28, 29], "convers": [28, 29, 30], "half": [28, 29, 30, 46, 61, 62], "float16": [28, 29, 30], "occupi": [28, 29, 30], "twice": [28, 29, 30], "smaller": [28, 29, 30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "fp16": [28, 29, 30, 35, 36, 46, 48, 49, 56, 60, 61, 62, 64, 65], "won": [28, 29, 30, 31, 38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "accept": [28, 29, 30], "216": [28, 38, 43, 51, 52], "encoder_param_filenam": [28, 29, 30], "encoder_bin_filenam": [28, 29, 30], "decoder_param_filenam": [28, 29, 30], "decoder_bin_filenam": [28, 29, 30], "joiner_param_filenam": [28, 29, 30], "joiner_bin_filenam": [28, 29, 30], "sound_filenam": [28, 29, 30], "328": 28, "336": 28, "106000": [28, 29, 30, 43, 47], "581": [28, 47], "381": 28, "7767517": [28, 29, 30], "1060": 28, "1342": 28, "in0": [28, 29, 30], "explan": [28, 29, 30], "magic": [28, 29, 30], "intermedi": [28, 29, 30], "1061": 28, "sherpametadata": [28, 29, 30], "sherpa_meta_data1": [28, 29, 30], "newli": [28, 29, 30], "must": [28, 29, 30, 61], "pair": [28, 29, 30], "sad": [28, 29, 30], "rememb": [28, 29, 30], "anymor": [28, 29, 30], "flexibl": [28, 29, 30, 35], "edit": [28, 29, 30], "arm": [28, 29, 30], "aarch64": [28, 29, 30], "onc": [28, 29], "mayb": [28, 29], "year": [28, 29], "_jit_trac": [28, 29], "fp32": [28, 29], "doubl": [28, 29], "py38": [28, 29, 30], "arg": [28, 29], "wave_filenam": [28, 29], "16k": [28, 29], "hz": [28, 29, 51, 52], "mono": [28, 29], "calibr": [28, 29], "cat": [28, 29], "eof": [28, 29], "calcul": [28, 29, 48, 61, 62], "has_gpu": [28, 29], "config": [28, 29], "use_vulkan_comput": [28, 29], "conv_87": 28, "942385": [28, 29], "threshold": [28, 29, 48], "938493": 28, "968131": 28, "conv_88": 28, "442448": 28, "549335": 28, "167552": 28, "conv_89": 28, "228289": 28, "001738": 28, "871552": 28, "linear_90": 28, "976146": 28, "101789": 28, "267128": 28, "linear_91": 28, "962030": 28, "162033": 28, "602713": 28, "linear_92": 28, "323041": 28, "853959": 28, "953129": 28, "linear_94": 28, "905416": 28, "648006": 28, "323545": 28, "linear_93": 28, "474093": 28, "200188": 28, "linear_95": 28, "888012": 28, "403563": 28, "483986": 28, "linear_96": 28, "856741": 28, "398679": 28, "524273": 28, "linear_97": 28, "635942": 28, "613655": 28, "590950": 28, "linear_98": 28, "460340": 28, "670146": 28, "398010": 28, "linear_99": 28, "532276": 28, "585537": 28, "119396": 28, "linear_101": 28, "585871": 28, "719224": 28, "205809": 28, "linear_100": 28, "751382": 28, "081648": 28, "linear_102": 28, "593344": 28, "450581": 28, "551147": 28, "linear_103": 28, "592681": 28, "705824": 28, "257959": 28, "linear_104": 28, "752957": 28, "980955": 28, "110489": 28, "linear_105": 28, "696240": 28, "877193": 28, "608953": 28, "linear_106": 28, "059659": 28, "643138": 28, "048950": 28, "linear_108": 28, "975461": 28, "589567": 28, "671457": 28, "linear_107": 28, "190381": 28, "515701": 28, "linear_109": 28, "710759": 28, "305635": 28, "082436": 28, "linear_110": 28, "531228": 28, "731162": 28, "159557": 28, "linear_111": 28, "528083": 28, "259322": 28, "211544": 28, "linear_112": 28, "148807": 28, "500842": 28, "087374": 28, "linear_113": 28, "592566": 28, "948851": 28, "166611": 28, "linear_115": 28, "437109": 28, "608947": 28, "642395": 28, "linear_114": 28, "193942": 28, "503904": 28, "linear_116": 28, "966980": 28, "200896": 28, "676392": 28, "linear_117": 28, "451303": 28, "061664": 28, "951344": 28, "linear_118": 28, "077262": 28, "965800": 28, "023804": 28, "linear_119": 28, "671615": 28, "847613": 28, "198460": 28, "linear_120": 28, "625638": 28, "131427": 28, "556595": 28, "linear_122": 28, "274080": 28, "888716": 28, "978189": 28, "linear_121": 28, "420480": 28, "429659": 28, "linear_123": 28, "826197": 28, "599617": 28, "281532": 28, "linear_124": 28, "396383": 28, "325849": 28, "335875": 28, "linear_125": 28, "337198": 28, "941410": 28, "221970": 28, "linear_126": 28, "699965": 28, "842878": 28, "224073": 28, "linear_127": 28, "775370": 28, "884215": 28, "696438": 28, "linear_129": 28, "872276": 28, "837319": 28, "254213": 28, "linear_128": 28, "180057": 28, "687883": 28, "linear_130": 28, "150427": 28, "454298": 28, "765789": 28, "linear_131": 28, "112692": 28, "924847": 28, "025545": 28, "linear_132": 28, "852893": 28, "116593": 28, "749626": 28, "linear_133": 28, "517084": 28, "024665": 28, "275314": 28, "linear_134": 28, "683807": 28, "878618": 28, "743618": 28, "linear_136": 28, "421055": 28, "322729": 28, "086264": 28, "linear_135": 28, "309880": 28, "917679": 28, "linear_137": 28, "827781": 28, "744595": 28, "915554": 28, "linear_138": 28, "422395": 28, "742882": 28, "402161": 28, "linear_139": 28, "527538": 28, "866123": 28, "849449": 28, "linear_140": 28, "128619": 28, "657793": 28, "266134": 28, "linear_141": 28, "839593": 28, "845993": 28, "021378": 28, "linear_143": 28, "442304": 28, "099039": 28, "889746": 28, "linear_142": 28, "325038": 28, "849592": 28, "linear_144": 28, "929444": 28, "618206": 28, "605080": 28, "linear_145": 28, "382126": 28, "321095": 28, "625010": 28, "linear_146": 28, "894987": 28, "867645": 28, "836517": 28, "linear_147": 28, "915313": 28, "906028": 28, "886522": 28, "linear_148": 28, "614287": 28, "908151": 28, "496181": 28, "linear_150": 28, "724932": 28, "485588": 28, "312899": 28, "linear_149": 28, "161146": 28, "606939": 28, "linear_151": 28, "164453": 28, "847355": 28, "719223": 28, "linear_152": 28, "086471": 28, "984121": 28, "222834": 28, "linear_153": 28, "099524": 28, "991601": 28, "816805": 28, "linear_154": 28, "054585": 28, "489706": 28, "286930": 28, "linear_155": 28, "389185": 28, "100321": 28, "963501": 28, "linear_157": 28, "982999": 28, "154796": 28, "637253": 28, "linear_156": 28, "537706": 28, "875190": 28, "linear_158": 28, "420287": 28, "502287": 28, "531588": 28, "linear_159": 28, "014746": 28, "423280": 28, "477261": 28, "linear_160": 28, "633553": 28, "715335": 28, "220921": 28, "linear_161": 28, "371849": 28, "117830": 28, "815203": 28, "linear_162": 28, "492933": 28, "126283": 28, "623318": 28, "linear_164": 28, "697504": 28, "825712": 28, "317358": 28, "linear_163": 28, "078367": 28, "008038": 28, "linear_165": 28, "023975": 28, "836278": 28, "577358": 28, "linear_166": 28, "860619": 28, "259792": 28, "493614": 28, "linear_167": 28, "380934": 28, "496160": 28, "107042": 28, "linear_168": 28, "691216": 28, "733317": 28, "831076": 28, "linear_169": 28, "723948": 28, "952728": 28, "129707": 28, "linear_171": 28, "034811": 28, "366547": 28, "665123": 28, "linear_170": 28, "356277": 28, "710501": 28, "linear_172": 28, "556884": 28, "729481": 28, "166058": 28, "linear_173": 28, "033039": 28, "207264": 28, "442120": 28, "linear_174": 28, "597379": 28, "658676": 28, "768131": 28, "linear_2": [28, 29], "293503": 28, "305265": 28, "877850": 28, "linear_1": [28, 29], "812222": 28, "766452": 28, "487047": 28, "linear_3": [28, 29], "999999": 28, "999755": 28, "031174": 28, "wish": [28, 29], "955k": 28, "18k": 28, "inparam": [28, 29], "inbin": [28, 29], "outparam": [28, 29], "outbin": [28, 29], "99m": 28, "78k": 28, "774k": [28, 29], "496": [28, 29, 43, 47], "replac": [28, 29], "774": [28, 29], "convolut": [28, 29, 48, 58, 61], "exact": [28, 29], "4x": [28, 29], "comparison": 28, "468000": [29, 33, 60], "lstm_transducer_stateless2": [29, 33, 60], "862": 29, "222": [29, 41, 43], "865": 29, "is_pnnx": 29, "62e404dd3f3a811d73e424199b3408e309c06e1a": [29, 30], "6d7a559": [29, 30], "feb": [29, 30, 40], "rnn_hidden_s": 29, "aux_layer_period": 29, "235": 29, "239": [29, 40], "472": 29, "324": 29, "83137520": 29, "596": 29, "325": 29, "257024": 29, "781812": 29, "327": 29, "84176356": 29, "182": [29, 30, 38, 47], "183": [29, 51, 52], "335": 29, "tracerwarn": [29, 30], "boolean": [29, 30], "caus": [29, 30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "incorrect": [29, 30, 40], "flow": [29, 30], "constant": [29, 30], "futur": [29, 30, 40, 66], "need_pad": 29, "bool": 29, "259": [29, 38], "339": 29, "207": [29, 41, 43], "324m": 29, "321": [29, 38], "318m": 29, "159m": 29, "21k": 29, "861": 29, "266": [29, 30, 43, 47], "431": 29, "342": 29, "343": 29, "379": 29, "268": [29, 43, 47], "317m": 29, "317": 29, "conv_15": 29, "930708": 29, "972025": 29, "conv_16": 29, "978855": 29, "031788": 29, "456645": 29, "conv_17": 29, "868437": 29, "830528": 29, "218575": 29, "linear_18": 29, "107259": 29, "194808": 29, "293236": 29, "linear_19": 29, "193777": 29, "634748": 29, "401705": 29, "linear_20": 29, "259933": 29, "606617": 29, "722160": 29, "linear_21": 29, "186600": 29, "790260": 29, "512129": 29, "linear_22": 29, "759041": 29, "265832": 29, "050053": 29, "linear_23": 29, "931209": 29, "099090": 29, "979767": 29, "linear_24": 29, "324160": 29, "215561": 29, "321835": 29, "linear_25": 29, "800708": 29, "599352": 29, "284134": 29, "linear_26": 29, "492444": 29, "153369": 29, "274391": 29, "linear_27": 29, "660161": 29, "720994": 29, "674126": 29, "linear_28": 29, "415265": 29, "174434": 29, "007133": 29, "linear_29": 29, "038418": 29, "118534": 29, "724262": 29, "linear_30": 29, "072084": 29, "936867": 29, "259155": 29, "linear_31": 29, "342712": 29, "599489": 29, "282787": 29, "linear_32": 29, "340535": 29, "120308": 29, "701103": 29, "linear_33": 29, "846987": 29, "630030": 29, "985939": 29, "linear_34": 29, "686298": 29, "204571": 29, "607586": 29, "linear_35": 29, "904821": 29, "575518": 29, "756420": 29, "linear_36": 29, "806659": 29, "585589": 29, "118401": 29, "linear_37": 29, "402340": 29, "047157": 29, "162680": 29, "linear_38": 29, "174589": 29, "923361": 29, "030258": 29, "linear_39": 29, "178576": 29, "556058": 29, "807705": 29, "linear_40": 29, "901954": 29, "301267": 29, "956539": 29, "linear_41": 29, "839805": 29, "597429": 29, "716181": 29, "linear_42": 29, "178945": 29, "651595": 29, "895699": 29, "829245": 29, "627592": 29, "637907": 29, "746186": 29, "255032": 29, "167313": 29, "000000": 29, "999756": 29, "031013": 29, "345k": 29, "17k": 29, "218m": 29, "counterpart": 29, "bit": [29, 38, 40, 41, 43, 47, 54], "4532": 29, "feedforward": [30, 35, 40, 46, 61], "384": [30, 35, 43], "unmask": [30, 35], "downsampl": [30, 35, 39], "factor": [30, 35, 38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "473": [30, 43], "246": [30, 40, 43, 51, 52], "477": 30, "warm_step": 30, "2000": [30, 41], "feedforward_dim": 30, "attention_dim": [30, 38, 40, 43], "encoder_unmasked_dim": 30, "zipformer_downsampling_factor": 30, "decode_chunk_len": 30, "257": [30, 40, 51, 52], "023": 30, "zipformer2": 30, "419": 30, "At": [30, 38, 43], "stack": 30, "downsampling_factor": 30, "037": 30, "655": 30, "346": 30, "68944004": 30, "347": 30, "260096": 30, "348": [30, 51], "716276": 30, "656": [30, 43], "349": 30, "69920376": 30, "351": 30, "353": 30, "174": [30, 43], "175": 30, "1344": 30, "cached_len": 30, "num_lay": 30, "1348": 30, "cached_avg": 30, "1352": 30, "cached_kei": 30, "1356": 30, "cached_v": 30, "1360": 30, "cached_val2": 30, "1364": 30, "cached_conv1": 30, "1368": 30, "cached_conv2": 30, "1373": 30, "left_context_len": 30, "1884": 30, "x_size": 30, "2442": 30, "2449": 30, "2469": 30, "2473": 30, "2483": 30, "kv_len": 30, "2570": 30, "attn_output": 30, "bsz": 30, "num_head": 30, "seq_len": 30, "head_dim": 30, "2926": 30, "lorder": 30, "2652": 30, "2653": 30, "embed_dim": 30, "2666": 30, "1543": 30, "in_x_siz": 30, "1637": 30, "1643": 30, "in_channel": 30, "1571": 30, "1763": 30, "src1": 30, "src2": 30, "1779": 30, "dim1": 30, "1780": 30, "dim2": 30, "_trace": 30, "958": 30, "tracer": 30, "tupl": 30, "namedtupl": 30, "absolut": 30, "know": [30, 44], "side": 30, "allow": [30, 46, 61], "behavior": [30, 40], "_c": 30, "_create_method_from_trac": 30, "646": 30, "357": 30, "embedding_out": 30, "686": 30, "361": [30, 43, 47], "735": 30, "269m": 30, "269": [30, 38, 51, 52], "725": [30, 47], "1022k": 30, "266m": 30, "8m": 30, "509k": 30, "133m": 30, "152k": 30, "4m": 30, "1022": 30, "509": 30, "360": 30, "365": 30, "280": [30, 43], "372": [30, 38], "state": [30, 38, 40, 41, 43, 46, 48, 49, 56, 60, 61, 62], "410": 30, "411": [30, 43], "2028": 30, "2547": 30, "2029": 30, "23316": 30, "23317": 30, "23318": 30, "23319": 30, "23320": 30, "amount": [30, 37, 39], "pad": [30, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "conv2dsubsampl": 30, "arrai": 30, "23300": 30, "repo_url": 31, "basenam": 31, "why": 32, "streaming_asr": [32, 33, 60, 61, 62], "conv_emform": 32, "offline_asr": [32, 46], "baz": 33, "compact": 35, "inject": 35, "competit": 35, "full": [35, 36, 43, 44, 46, 48, 49, 60, 61, 62], "subset": [35, 36, 43, 46, 48, 49, 60, 61, 62], "instruct": [35, 36], "intial": [35, 36], "decode_gigaspeech": [35, 36], "1000": [35, 36, 43, 64, 65], "insert": 35, "residu": 35, "zipformer2encoderlay": 35, "remain": 35, "untouch": 35, "experi": [35, 36, 38, 40, 41, 43, 44, 46, 48, 49, 54, 60, 61, 62], "do_finetun": [35, 36], "use_adapt": 35, "adapter_dim": 35, "zipformer_adapt": 35, "world": [35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 56, 60, 61, 62, 64, 65], "exp_giga_finetune_adapt": 35, "_adapter_dim": 35, "045": 35, "13022": 35, "ckpt": [35, 36], "certain": [35, 36, 37], "bottleneck": 35, "notic": 35, "trainal": 35, "2024": [35, 64], "808": [35, 43, 51], "1277": 35, "761344": 35, "trainabl": 35, "entir": 35, "deactiv": 35, "keep": [35, 40, 46, 61, 62], "768": 35, "1536": 35, "queri": 35, "po": 35, "causal": [35, 61], "previou": [36, 56], "stateless": [36, 39, 42, 46, 60, 61, 62], "due": [36, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "vocabulari": [36, 40], "use_mux": 36, "exp_giga_finetun": 36, "_mux": 36, "0045": 36, "mux": 36, "13024": 36, "forget": 36, "quickli": 36, "mix": 36, "maintain": 36, "ones": 36, "lower": [36, 60], "public": 37, "capabl": 37, "high": [37, 39, 64], "label": 37, "1best": [38, 41, 43, 47, 48, 49, 51, 52], "automag": [38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "stop": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "By": [38, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "musan": [38, 41, 43, 44, 46, 48, 49, 60, 61, 62], "apt": [38, 41], "permiss": [38, 41], "commandlin": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "multi": [38, 40, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "machin": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "ddp": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "implement": [38, 40, 41, 43, 44, 46, 48, 49, 58, 60, 61, 62], "utter": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "oom": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "decai": [38, 41, 43, 48, 49, 60], "warmup": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "function": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "get_param": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "directli": [38, 40, 41, 43, 44, 46, 48, 49, 60, 61, 62], "perturb": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "3x150": [38, 40, 41], "450": [38, 40, 41], "visual": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "logdir": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "labelsmooth": 38, "tensorflow": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61], "press": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "ctrl": [38, 40, 41, 43, 46, 48, 49, 54, 60, 61, 62], "engw8ksktzqs24zbv5dgcg": 38, "2021": [38, 41, 43, 47, 51, 52, 54], "22t11": 38, "scan": [38, 40, 41, 43, 46, 54, 60, 61], "116068": 38, "scalar": [38, 40, 41, 43, 46, 54, 60, 61], "listen": [38, 40, 41, 46, 54, 60, 61], "xxxx": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "saw": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "consol": [38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "avoid": [38, 40, 43], "nbest": [38, 43, 49], "lattic": [38, 41, 43, 46, 47, 51, 52, 61, 62], "uniqu": [38, 43, 46, 61, 62], "pkufool": [38, 41, 47], "icefall_asr_aishell_conformer_ctc": 38, "transcrib": [38, 40, 41, 43], "lang_char": [38, 40], "bac009s0764w0121": [38, 40, 41], "bac009s0764w0122": [38, 40, 41], "bac009s0764w0123": [38, 40, 41], "tran": [38, 41, 43, 47, 51, 52], "conveni": [38, 41, 43, 44], "eo": [38, 41, 43], "soxi": [38, 40, 41, 43, 47, 54], "sampl": [38, 40, 41, 43, 47, 48, 54, 61, 62], "precis": [38, 40, 41, 43, 46, 47, 54, 61, 62], "67263": [38, 40, 41], "cdda": [38, 40, 41, 43, 47, 54], "sector": [38, 40, 41, 43, 47, 54], "135k": [38, 40, 41], "256k": [38, 40, 41, 43], "sign": [38, 40, 41, 43, 54], "integ": [38, 40, 41, 43, 54], "pcm": [38, 40, 41, 43, 54], "65840": [38, 40, 41], "308": [38, 40, 41], "625": [38, 40, 41], "132k": [38, 40, 41], "64000": [38, 40, 41], "300": [38, 40, 41, 43, 44, 46, 56, 61], "128k": [38, 40, 41, 54], "topologi": [38, 43], "num_decoder_lay": [38, 43], "vgg_frontend": [38, 40, 43], "use_feat_batchnorm": [38, 43], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 38, "sun": 38, "sep": 38, "33cfe45": 38, "d57a873": 38, "nov": [38, 43], "hw": 38, "kangwei": 38, "icefall_aishell3": 38, "k2_releas": 38, "tokens_fil": 38, "num_path": [38, 43, 46, 61, 62], "ngram_lm_scal": [38, 43], "attention_decoder_scal": [38, 43], "nbest_scal": [38, 43], "sos_id": [38, 43], "eos_id": [38, 43], "4336": [38, 40], "293": [38, 43], "369": [38, 43], "\u751a": [38, 40], "\u81f3": [38, 40], "\u51fa": [38, 40], "\u73b0": [38, 40], "\u4ea4": [38, 40], "\u6613": [38, 40], "\u51e0": [38, 40], "\u4e4e": [38, 40], "\u505c": [38, 40], "\u6b62": 38, "\u7684": [38, 40, 41], "\u60c5": [38, 40], "\u51b5": [38, 40], "\u4e00": [38, 40], "\u4e8c": [38, 40], "\u7ebf": [38, 40, 41], "\u57ce": [38, 40], "\u5e02": [38, 40], "\u867d": [38, 40], "\u7136": [38, 40], "\u4e5f": [38, 40, 41], "\u5904": [38, 40], "\u4e8e": [38, 40], "\u8c03": [38, 40], "\u6574": [38, 40], "\u4e2d": [38, 40, 41], "\u4f46": [38, 40, 41], "\u56e0": [38, 40], "\u4e3a": [38, 40], "\u805a": [38, 40], "\u96c6": [38, 40], "\u4e86": [38, 40, 41], "\u8fc7": [38, 40], "\u591a": [38, 40], "\u516c": [38, 40], "\u5171": [38, 40], "\u8d44": [38, 40], "\u6e90": [38, 40], "371": 38, "683": 38, "684": [38, 54], "651": [38, 54], "654": 38, "659": 38, "752": 38, "887": 38, "340": 38, "370": 38, "\u751a\u81f3": [38, 41], "\u51fa\u73b0": [38, 41], "\u4ea4\u6613": [38, 41], "\u51e0\u4e4e": [38, 41], "\u505c\u6b62": 38, "\u60c5\u51b5": [38, 41], "\u4e00\u4e8c": [38, 41], "\u57ce\u5e02": [38, 41], "\u867d\u7136": [38, 41], "\u5904\u4e8e": [38, 41], "\u8c03\u6574": [38, 41], "\u56e0\u4e3a": [38, 41], "\u805a\u96c6": [38, 41], "\u8fc7\u591a": [38, 41], "\u516c\u5171": [38, 41], "\u8d44\u6e90": [38, 41], "recor": [38, 43], "highest": [38, 43], "966": 38, "821": 38, "822": 38, "826": 38, "916": 38, "345": 38, "889": 38, "limit": [38, 40, 43, 58, 61], "upgrad": [38, 43], "checkout": [38, 43], "hlg_decod": [38, 43], "four": [38, 43], "messag": [38, 43, 46, 48, 49, 60, 61, 62], "use_gpu": [38, 43], "word_tabl": [38, 43], "forward": [38, 43, 48], "cu": [38, 43], "char": [38, 43], "693": [38, 51], "nnet_output": [38, 43], "185": [38, 43, 54], "217": [38, 43], "mandarin": 39, "beij": 39, "shell": 39, "technologi": 39, "ltd": 39, "peopl": 39, "accent": 39, "area": 39, "invit": 39, "particip": 39, "conduct": 39, "indoor": 39, "fidel": 39, "microphon": 39, "16khz": 39, "manual": 39, "profession": 39, "annot": 39, "inspect": 39, "free": [39, 44, 56, 60], "academ": 39, "moder": 39, "research": 39, "openslr": [39, 56], "conv1d": [40, 46, 60, 61, 62], "tanh": 40, "borrow": 40, "ieeexplor": 40, "ieee": 40, "jsp": 40, "arnumb": 40, "9054419": 40, "predict": [40, 44, 46, 60, 61, 62], "87939824": 40, "optimized_transduc": 40, "technqiu": 40, "maximum": 40, "emit": 40, "simplifi": [40, 58], "significantli": 40, "degrad": 40, "exactli": 40, "unprun": 40, "advantag": 40, "minim": 40, "pruned_transducer_stateless": [40, 46, 58, 61], "altern": 40, "though": 40, "transducer_stateless_modifi": 40, "pr": 40, "ram": 40, "tri": 40, "prob": [40, 60], "219": [40, 43], "lagz6hrcqxoigbfd5e0y3q": 40, "03t14": 40, "8477": 40, "250": [40, 47], "sym": [40, 46, 61, 62], "beam_search": [40, 46, 61, 62], "decoding_method": 40, "beam_4": 40, "ensur": 40, "poor": 40, "531": [40, 41], "994": [40, 43], "027": 40, "encoder_out_dim": 40, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 40, "50d2281": 40, "mar": 40, "0815224919": 40, "75d558775b": 40, "mmnv8": 40, "878": [40, 52], "880": 40, "891": 40, "userwarn": 40, "__floordiv__": 40, "round": 40, "toward": 40, "trunc": 40, "floor": 40, "div": 40, "rounding_mod": 40, "divis": 40, "x_len": 40, "\u6ede": 40, "322": 40, "759": 40, "760": 40, "919": 40, "922": 40, "929": 40, "046": 40, "319": [40, 43], "798": 40, "831": [40, 52], "215": [40, 43, 47], "402": 40, "topk_hyp_index": 40, "topk_index": 40, "logit": 40, "583": [40, 52], "lji9mwuorlow3jkdhxwk8a": 41, "13t11": 41, "4454": 41, "icefall_asr_aishell_tdnn_lstm_ctc": 41, "858": [41, 43], "389": [41, 43], "536": 41, "539": 41, "917": 41, "\u505c\u6ede": 41, "mmi": [42, 45], "blank": [42, 45], "skip": [42, 44, 45, 46, 60, 61, 62], "distil": [42, 45], "hubert": [42, 45], "ligru": [42, 50], "libri": [43, 44, 46, 48, 49, 60, 61, 62], "3x960": [43, 46, 48, 49, 60, 61, 62], "2880": [43, 46, 48, 49, 60, 61, 62], "lzgnetjwrxc3yghnmd4kpw": 43, "24t16": 43, "4540": 43, "sentenc": [43, 56], "piec": 43, "And": [43, 46, 48, 49, 60, 61, 62], "neither": 43, "nor": 43, "5000": 43, "033": 43, "537": 43, "full_libri": [43, 44], "464": 43, "548": 43, "776": 43, "652": [43, 54], "109226120": 43, "714": [43, 51], "206": 43, "944": 43, "1328": 43, "443": [43, 47], "2563": 43, "494": 43, "592": 43, "1715": 43, "52576": 43, "1424": 43, "807": 43, "506": 43, "362": 43, "1477": 43, "2922": 43, "4295": 43, "52343": 43, "396": 43, "3584": 43, "432": 43, "680": [43, 51], "_pickl": 43, "unpicklingerror": 43, "invalid": 43, "hlg_modifi": 43, "g_4_gram": [43, 47, 51, 52], "sentencepiec": 43, "875": [43, 47], "212k": 43, "267440": [43, 47], "1253": [43, 47], "535k": 43, "77200": [43, 47], "154k": 43, "554": 43, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 43, "8d93169": 43, "601": 43, "025": 43, "broffel": 43, "osom": 43, "723": 43, "775": 43, "881": 43, "571": 43, "857": 43, "979": 43, "055": 43, "051": 43, "363": 43, "959": [43, 52], "546": 43, "598": 43, "599": [43, 47], "833": 43, "834": 43, "915": 43, "076": 43, "397": 43, "999": [43, 46, 61, 62], "concaten": 43, "bucket": 43, "sampler": 43, "ctc_decod": 43, "ngram_lm_rescor": 43, "attention_rescor": 43, "228": 43, "543": 43, "topo": 43, "547": 43, "729": 43, "703": 43, "545": 43, "945": 43, "475": 43, "191": [43, 51, 52], "398": 43, "515": 43, "deseri": 43, "441": 43, "fsaclass": 43, "loadfsa": 43, "const": 43, "string": 43, "c10": 43, "ignor": 43, "589": 43, "attention_scal": 43, "188": 43, "984": 43, "624": 43, "519": [43, 52], "632": 43, "645": [43, 54], "243": 43, "303": 43, "179": 43, "knowledg": 44, "vector": 44, "mvq": 44, "kd": 44, "pruned_transducer_stateless4": [44, 46, 58, 61], "theoret": 44, "applic": 44, "minor": 44, "stop_stag": [44, 64, 65], "thing": 44, "distillation_with_hubert": 44, "Of": 44, "cours": 44, "xl": 44, "proce": 44, "960h": [44, 48], "use_extracted_codebook": 44, "augment": 44, "th": [44, 51, 52], "embedding_lay": 44, "num_codebook": 44, "under": [44, 56], "vq_fbank_layer36_cb8": 44, "whola": 44, "snippet": 44, "echo": 44, "awk": 44, "pruned_transducer_stateless6": 44, "12359": 44, "spec": 44, "warp": 44, "paid": 44, "suitabl": [46, 60, 61, 62], "pruned_transducer_stateless2": [46, 58, 61], "pruned_transducer_stateless5": [46, 58, 61], "scroll": [46, 48, 49, 60, 61, 62], "arxiv": [46, 60, 61, 62], "2206": [46, 60, 61, 62], "13236": [46, 60, 61, 62], "rework": [46, 58, 61], "daniel": [46, 61, 62], "joint": [46, 60, 61, 62], "contrari": [46, 60, 61, 62], "convent": [46, 60, 61, 62], "recurr": [46, 60, 61, 62], "2x": [46, 61, 62], "littl": [46, 61], "436000": [46, 48, 49, 60, 61, 62], "438000": [46, 48, 49, 60, 61, 62], "qogspbgsr8kzcrmmie9jgw": 46, "20t15": [46, 60, 61], "4468": [46, 60, 61], "210171": [46, 60, 61], "access": [46, 48, 49, 60, 61, 62], "googl": [46, 48, 49, 60, 61, 62], "6008": [46, 48, 49, 60, 61, 62], "localhost": [46, 48, 49, 60, 61, 62], "expos": [46, 48, 49, 60, 61, 62], "proxi": [46, 48, 49, 60, 61, 62], "bind_al": [46, 48, 49, 60, 61, 62], "fast_beam_search": [46, 48, 60, 61, 62], "474000": [46, 60, 61, 62], "largest": [46, 61, 62], "posterior": [46, 48, 61, 62], "algorithm": [46, 61, 62], "pdf": [46, 49, 61, 62], "1211": [46, 61, 62], "3711": [46, 61, 62], "espnet": [46, 61, 62], "net": [46, 61, 62], "beam_search_transduc": [46, 61, 62], "basic": [46, 61], "topk": [46, 61, 62], "expand": [46, 61, 62], "mode": [46, 61, 62], "being": [46, 61, 62], "hardcod": [46, 61, 62], "composit": [46, 61, 62], "hard": [46, 58, 61, 62], "2211": [46, 61, 62], "00484": [46, 61, 62], "fast_beam_search_lg": [46, 61, 62], "trivial": [46, 61, 62], "fast_beam_search_nbest": [46, 61, 62], "random_path": [46, 61, 62], "shortest": [46, 61, 62], "fast_beam_search_nbest_lg": [46, 61, 62], "logic": [46, 61, 62], "smallest": [46, 60, 61, 62], "normal": [47, 51, 52, 54, 61], "icefall_asr_librispeech_tdnn": 47, "lstm_ctc": 47, "flac": 47, "116k": 47, "140k": 47, "343k": 47, "164k": 47, "105k": 47, "174k": 47, "pretraind": 47, "584": [47, 52], "791": 47, "245": 47, "098": 47, "099": 47, "methond": [47, 51, 52], "631": 47, "010": 47, "guidanc": 48, "bigger": 48, "simpli": 48, "discard": 48, "prevent": 48, "lconv": 48, "encourag": [48, 49, 60], "stabil": [48, 49], "doesn": 48, "warm": [48, 49], "xyozukpeqm62hbilud4upa": [48, 49], "ctc_guide_decode_b": 48, "pretrained_ctc": 48, "jit_pretrained_ctc": 48, "100h": 48, "yfyeung": 48, "wechat": 49, "zipformer_mmi": 49, "worker": [49, 60], "hp": 49, "tdnn_ligru_ctc": 51, "enough": [51, 52, 54, 56], "luomingshuang": [51, 52], "icefall_asr_timit_tdnn_ligru_ctc": 51, "pretrained_average_9_25": 51, "fdhc0_si1559": [51, 52], "felc0_si756": [51, 52], "fmgd0_si1564": [51, 52], "ffprobe": [51, 52], "show_format": [51, 52], "nistspher": [51, 52], "database_id": [51, 52], "database_vers": [51, 52], "utterance_id": [51, 52], "dhc0_si1559": [51, 52], "sample_min": [51, 52], "4176": [51, 52], "sample_max": [51, 52], "5984": [51, 52], "bitrat": [51, 52], "pcm_s16le": [51, 52], "s16": [51, 52], "elc0_si756": [51, 52], "1546": [51, 52], "1989": [51, 52], "mgd0_si1564": [51, 52], "7626": [51, 52], "10573": [51, 52], "660": 51, "695": 51, "697": 51, "819": 51, "829": 51, "sil": [51, 52], "dh": [51, 52], "ih": [51, 52], "uw": [51, 52], "ah": [51, 52], "ii": [51, 52], "aa": [51, 52], "ei": [51, 52], "dx": [51, 52], "uh": [51, 52], "ng": [51, 52, 64], "eh": [51, 52], "jh": [51, 52], "er": [51, 52], "ai": [51, 52], "hh": [51, 52], "aw": 51, "ae": [51, 52], "705": 51, "715": 51, "720": 51, "251": [51, 52], "ch": 51, "icefall_asr_timit_tdnn_lstm_ctc": 52, "pretrained_average_16_25": 52, "816": 52, "827": 52, "unk": 52, "739": 52, "977": 52, "978": 52, "981": 52, "ow": 52, "ykubhb5wrmosxykid1z9eg": 54, "23t23": 54, "icefall_asr_yesno_tdnn": 54, "0_0_1_0_0_1_1_1": 54, "0_0_1_0_1_0_0_1": 54, "0_0_1_1_0_0_0_1": 54, "0_0_1_1_0_1_1_0": 54, "0_0_1_1_1_0_0_0": 54, "0_0_1_1_1_1_0_0": 54, "0_1_0_0_0_1_0_0": 54, "0_1_0_0_1_0_1_0": 54, "0_1_0_1_0_0_0_0": 54, "0_1_0_1_1_1_0_0": 54, "0_1_1_0_0_1_1_1": 54, "0_1_1_1_0_0_1_0": 54, "0_1_1_1_1_0_1_0": 54, "1_0_0_0_0_0_0_0": 54, "1_0_0_0_0_0_1_1": 54, "1_0_0_1_0_1_1_1": 54, "1_0_1_1_0_1_1_1": 54, "1_0_1_1_1_1_0_1": 54, "1_1_0_0_0_1_1_1": 54, "1_1_0_0_1_0_1_1": 54, "1_1_0_1_0_1_0_0": 54, "1_1_0_1_1_0_0_1": 54, "1_1_0_1_1_1_1_0": 54, "1_1_1_0_0_1_0_1": 54, "1_1_1_0_1_0_1_0": 54, "1_1_1_1_0_0_1_0": 54, "1_1_1_1_1_0_0_0": 54, "1_1_1_1_1_1_1_1": 54, "54080": 54, "507": 54, "108k": 54, "650": 54, "198": 54, "181": 54, "186": 54, "187": 54, "correctli": 54, "simplest": 54, "nnlm": 56, "complet": 56, "wget": [56, 64], "resourc": 56, "norm": 56, "gzip": 56, "prepare_lm_training_data": 56, "lm_data": 56, "grab": 56, "cup": 56, "coffe": 56, "sort_lm_training_data": 56, "sorted_lm_data": 56, "statist": 56, "lm_data_stat": 56, "aforement": 56, "repeat": 56, "rnn_lm": 56, "tie": 56, "hyper": [56, 64, 65], "coupl": [56, 64, 65], "dai": [56, 64, 65], "former": 58, "mask": [58, 61, 62], "wenet": 58, "did": 58, "request": 58, "complic": 58, "techniqu": 58, "bank": 58, "memor": 58, "histori": 58, "introduc": 58, "variant": 58, "pruned_stateless_emformer_rnnt2": 58, "conv_emformer_transducer_stateless": 58, "ourself": 58, "mechan": 58, "onlin": 60, "lstm_transducer_stateless": 60, "prepare_giga_speech": 60, "cj2vtpiwqhkn9q1tx6ptpg": 60, "dynam": [61, 62], "short": [61, 62], "2012": 61, "05481": 61, "flag": 61, "indic": [61, 62], "whether": 61, "uniformli": [61, 62], "seen": [61, 62], "97vkxf80ru61cnp2alwzzg": 61, "streaming_decod": [61, 62], "wise": [61, 62], "parallel": [61, 62], "bath": [61, 62], "parallelli": [61, 62], "seem": 61, "benefit": 61, "320m": 62, "550": 62, "basicli": 62, "scriptmodul": 62, "jit_trace_export": 62, "jit_trace_pretrain": 62, "monoton": 63, "condit": [64, 65], "variat": [64, 65], "autoencod": [64, 65], "adversari": [64, 65], "piper_phonem": 64, "numba": 64, "espnet_tts_frontend": 64, "monotonic_align": [64, 65], "build_ext": [64, 65], "inplac": [64, 65], "medium": 64, "ground": [64, 65], "truth": [64, 65], "test_onnx": [64, 65], "program": 64, "kotlin": 64, "java": 64, "swift": 64, "offlin": 64, "espeak": 64, "bz2": 64, "xf": 64, "thread": 64, "countri": 64, "plai": 64, "350": 65, "zrjin": 65, "synthesi": 66, "task": 66}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 26, 32, 33], "creat": [2, 13, 20, 25], "recip": [2, 66], "data": [2, 9, 11, 20, 25, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "prepar": [2, 9, 11, 20, 25, 35, 36, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "train": [2, 9, 16, 22, 25, 28, 29, 30, 31, 35, 36, 37, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "decod": [2, 5, 6, 7, 9, 12, 25, 26, 31, 35, 38, 40, 41, 43, 44, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "pre": [2, 22, 28, 29, 30, 31, 35, 36, 37, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62], "model": [2, 5, 15, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 56, 60, 61, 62, 64, 65], "lodr": [4, 6], "rnn": [4, 55, 56], "transduc": [4, 6, 7, 28, 29, 30, 40, 46, 60, 61, 62], "wer": [4, 6, 7, 43], "differ": [4, 6, 7, 17], "beam": [4, 6, 7, 40], "size": [4, 6, 7], "languag": [5, 56], "lm": [6, 43, 55], "rescor": [6, 38, 43], "base": [6, 18, 19, 20], "method": 6, "v": 6, "shallow": [6, 7], "fusion": [6, 7], "The": [6, 40], "number": 6, "each": [6, 20], "field": 6, "i": 6, "test": [6, 7, 20, 25, 28, 29, 30], "clean": [6, 7], "other": 6, "time": [6, 7], "docker": [8, 9], "introduct": [9, 58], "view": 9, "avail": 9, "tag": 9, "cuda": [9, 25], "enabl": 9, "imag": 9, "cpu": 9, "onli": 9, "download": [9, 11, 25, 28, 29, 30, 31, 38, 40, 41, 43, 46, 47, 48, 49, 51, 52, 54, 60, 61, 62, 64, 65], "run": [9, 26, 64], "gpu": 9, "yesno": [9, 53], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 11, 20, 31, 64], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 25, 28, 29, 30, 38, 40, 41, 43], "0": [10, 25], "No": 10, "For": [11, 12, 13, 15, 16], "more": [11, 12, 13, 15, 16], "curiou": [11, 12, 13, 15, 16], "A": 11, "quick": 11, "look": 11, "gener": [11, 20], "environ": [13, 20, 25], "setup": 13, "virtual": [13, 25], "instal": [13, 25, 28, 29, 30, 38, 40, 41, 43, 47, 51, 52, 64], "depend": [13, 64], "icefal": [13, 14, 24, 25, 28, 29, 30], "dummi": 14, "tutori": 14, "export": [15, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 46, 48, 49, 60, 61, 62, 64, 65], "paramet": 15, "via": [15, 28, 29, 30], "state_dict": [15, 26, 46, 48, 49, 60, 61, 62], "torch": [15, 25, 28, 29, 30, 32, 33, 46, 48, 49, 60, 61, 62], "jit": [15, 28, 29, 30, 32, 33, 46, 48, 49, 60, 61, 62], "script": [15, 32, 46, 48, 49, 61, 62], "onnx": [15, 31, 64], "two": 17, "approach": 17, "between": 17, "fst": [18, 20], "forc": [18, 19, 20], "align": [18, 19, 20, 64, 65], "content": [18, 24, 37, 66], "k2": [19, 25], "kaldi": 20, "get": 20, "comput": [20, 43], "log_prob": 20, "token2id": 20, "id2token": 20, "word2id": 20, "id2word": 20, "lexicon": [20, 64], "relat": 20, "convert": 20, "transcript": 20, "an": [20, 56], "graph": 20, "segment": 20, "word": 20, "us": [20, 26, 32, 33, 46, 48, 49, 60, 61, 62], "summari": 20, "huggingfac": [21, 23], "space": 23, "youtub": [23, 25], "video": [23, 25], "toolkit": 25, "cudnn": 25, "torchaudio": 25, "2": [25, 28, 29, 30, 38, 40, 41, 43], "3": [25, 28, 29, 30, 38, 40, 43], "lhots": 25, "4": [25, 28, 29, 30], "exampl": [25, 31, 38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "5": [25, 28, 29, 30], "6": [25, 28, 29, 30], "your": 25, "when": [26, 32, 33], "py": 26, "ncnn": [27, 28, 29, 30], "convemform": 28, "pnnx": [28, 29, 30], "trace": [28, 29, 30, 33, 60, 62], "torchscript": [28, 29, 30], "modifi": [28, 29, 30, 40], "encod": [28, 29, 30], "sherpa": [28, 29, 30, 31, 46, 61, 62, 64], "7": [28, 29], "option": [28, 29, 38, 41, 43, 46, 48, 49, 60, 61, 62], "int8": [28, 29], "quantiz": [28, 29], "lstm": [29, 41, 47, 52, 60], "stream": [30, 42, 57, 58, 61, 62], "zipform": [30, 35, 36, 48, 49, 62], "sound": 31, "finetun": [35, 36], "from": [35, 36], "adapt": 35, "fine": [35, 36, 37], "tune": [35, 36, 37], "supervis": 36, "tabl": [37, 66], "conform": [38, 43, 58], "ctc": [38, 41, 43, 47, 48, 51, 52, 54], "configur": [38, 41, 43, 46, 48, 49, 60, 61, 62], "log": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62], "usag": [38, 40, 41, 43, 46, 48, 49, 60, 61, 62, 64], "case": [38, 40, 41, 43], "kaldifeat": [38, 40, 41, 43, 47, 51, 52, 54], "hlg": [38, 41, 43], "attent": [38, 43], "colab": [38, 40, 41, 43, 47, 51, 52, 54], "notebook": [38, 40, 41, 43, 47, 51, 52, 54], "deploy": [38, 43], "c": [38, 43], "aishel": 39, "stateless": 40, "loss": 40, "todo": 40, "greedi": 40, "search": [40, 64, 65], "tdnn": [41, 47, 51, 52, 54], "non": 42, "asr": [42, 57], "n": 43, "gram": 43, "distil": 44, "hubert": 44, "codebook": 44, "index": 44, "librispeech": [45, 59], "prune": [46, 61], "statelessx": [46, 61], "pretrain": [46, 48, 49, 60, 61, 62, 64, 65], "deploi": [46, 61, 62], "infer": [47, 51, 52, 54, 64, 65], "blank": 48, "skip": 48, "mmi": 49, "timit": 50, "ligru": 51, "emform": 58, "which": 60, "simul": [61, 62], "real": [61, 62], "tt": 63, "vit": [64, 65], "ljspeech": 64, "extra": 64, "build": [64, 65], "monoton": [64, 65], "vctk": 65}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 58}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [11, "data-preparation"], [40, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [25, "training"], [38, "training"], [40, "training"], [41, "training"], [43, "training"], [44, "training"], [46, "training"], [47, "training"], [48, "training"], [49, "training"], [51, "training"], [52, "training"], [54, "training"], [60, "training"], [61, "training"], [62, "training"], [64, "training"], [65, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [12, "decoding"], [25, "decoding"], [35, "decoding"], [38, "decoding"], [40, "decoding"], [41, "decoding"], [43, "decoding"], [44, "decoding"], [46, "decoding"], [47, "decoding"], [48, "decoding"], [49, "decoding"], [51, "decoding"], [52, "decoding"], [54, "decoding"], [60, "decoding"], [61, "decoding"], [62, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[6, "id3"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [58, "introduction"]], "View available tags": [[9, "view-available-tags"]], "CUDA-enabled docker images": [[9, "cuda-enabled-docker-images"]], "CPU-only docker images": [[9, "cpu-only-docker-images"]], "Download a docker image (CUDA)": [[9, "download-a-docker-image-cuda"]], "Download a docker image (CPU)": [[9, "download-a-docker-image-cpu"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [25, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [41, "data-preparation"], [43, "data-preparation"], [44, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"], [48, "data-preparation"], [49, "data-preparation"], [51, "data-preparation"], [52, "data-preparation"], [54, "data-preparation"], [60, "data-preparation"], [61, "data-preparation"], [62, "data-preparation"], [64, "data-preparation"], [65, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "For the more curious": [[11, "for-the-more-curious"], [12, "for-the-more-curious"], [13, "for-the-more-curious"], [15, "for-the-more-curious"], [16, "for-the-more-curious"]], "A quick look to the generated files": [[11, "a-quick-look-to-the-generated-files"]], "download": [[11, "download"]], "data": [[11, "data"]], "Environment setup": [[13, "environment-setup"]], "Create a virtual environment": [[13, "create-a-virtual-environment"]], "Install dependencies": [[13, "install-dependencies"]], "Install icefall": [[13, "install-icefall"]], "Icefall for dummies tutorial": [[14, "icefall-for-dummies-tutorial"]], "Model Export": [[15, "model-export"]], "Export the model parameters via model.state_dict()": [[15, "export-the-model-parameters-via-model-state-dict"]], "Export via torch.jit.script()": [[15, "export-via-torch-jit-script"]], "Export via torch.onnx.export()": [[15, "export-via-torch-onnx-export"]], "Two approaches": [[17, "two-approaches"]], "Differences between the two approaches": [[17, "differences-between-the-two-approaches"]], "FST-based forced alignment": [[18, "fst-based-forced-alignment"]], "Contents:": [[18, null], [24, null]], "k2-based forced alignment": [[19, "k2-based-forced-alignment"]], "Kaldi-based forced alignment": [[20, "kaldi-based-forced-alignment"]], "Prepare the environment": [[20, "prepare-the-environment"]], "Get the test data": [[20, "get-the-test-data"]], "Compute log_probs": [[20, "compute-log-probs"]], "Create token2id and id2token": [[20, "create-token2id-and-id2token"]], "Create word2id and id2word": [[20, "create-word2id-and-id2word"]], "Generate lexicon-related files": [[20, "generate-lexicon-related-files"]], "Convert transcript to an FST graph": [[20, "convert-transcript-to-an-fst-graph"]], "Force aligner": [[20, "force-aligner"]], "Segment each word using the computed alignments": [[20, "segment-each-word-using-the-computed-alignments"]], "Summary": [[20, "summary"]], "Huggingface": [[21, "huggingface"]], "Pre-trained models": [[22, "pre-trained-models"]], "Huggingface spaces": [[23, "huggingface-spaces"]], "YouTube Video": [[23, "youtube-video"], [25, "youtube-video"]], "Icefall": [[24, "icefall"]], "Installation": [[25, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[25, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[25, "install-torch-and-torchaudio"]], "(2) Install k2": [[25, "install-k2"]], "(3) Install lhotse": [[25, "install-lhotse"]], "(4) Download icefall": [[25, "download-icefall"]], "Installation example": [[25, "installation-example"]], "(1) Create a virtual environment": [[25, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[25, "id1"]], "(3) Install torch and torchaudio": [[25, "id2"]], "(4) Install k2": [[25, "id3"]], "(5) Install lhotse": [[25, "id5"]], "(6) Download icefall": [[25, "id6"]], "Test Your Installation": [[25, "test-your-installation"]], "Export model.state_dict()": [[26, "export-model-state-dict"], [46, "export-model-state-dict"], [48, "export-model-state-dict"], [49, "export-model-state-dict"], [60, "export-model-state-dict"], [61, "export-model-state-dict"], [62, "export-model-state-dict"]], "When to use it": [[26, "when-to-use-it"], [32, "when-to-use-it"], [33, "when-to-use-it"]], "How to export": [[26, "how-to-export"], [32, "how-to-export"], [33, "how-to-export"]], "How to use the exported model": [[26, "how-to-use-the-exported-model"], [32, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[26, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[27, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[28, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[28, "download-the-pre-trained-model"], [29, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[28, "install-ncnn-and-pnnx"], [29, "install-ncnn-and-pnnx"], [30, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[28, "export-the-model-via-torch-jit-trace"], [29, "export-the-model-via-torch-jit-trace"], [30, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[28, "export-torchscript-model-via-pnnx"], [29, "export-torchscript-model-via-pnnx"], [30, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[28, "test-the-exported-models-in-icefall"], [29, "test-the-exported-models-in-icefall"], [30, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[28, "modify-the-exported-encoder-for-sherpa-ncnn"], [29, "modify-the-exported-encoder-for-sherpa-ncnn"], [30, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[28, "optional-int8-quantization-with-sherpa-ncnn"], [29, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[29, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[30, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[31, "export-to-onnx"]], "sherpa-onnx": [[31, "sherpa-onnx"]], "Example": [[31, "example"]], "Download the pre-trained model": [[31, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [40, "download-the-pre-trained-model"], [41, "download-the-pre-trained-model"], [43, "download-the-pre-trained-model"], [47, "download-the-pre-trained-model"], [51, "download-the-pre-trained-model"], [52, "download-the-pre-trained-model"], [54, "download-the-pre-trained-model"]], "Export the model to ONNX": [[31, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[31, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[32, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[33, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[33, "how-to-use-the-exported-models"]], "Model export": [[34, "model-export"]], "Finetune from a pre-trained Zipformer model with adapters": [[35, "finetune-from-a-pre-trained-zipformer-model-with-adapters"]], "Model preparation": [[35, "model-preparation"], [36, "model-preparation"]], "Fine-tune with adapter": [[35, "fine-tune-with-adapter"]], "Export the model": [[35, "export-the-model"]], "Finetune from a supervised pre-trained Zipformer model": [[36, "finetune-from-a-supervised-pre-trained-zipformer-model"]], "Fine-tune": [[36, "fine-tune"]], "Fine-tune a pre-trained model": [[37, "fine-tune-a-pre-trained-model"]], "Table of Contents": [[37, null], [66, null]], "Conformer CTC": [[38, "conformer-ctc"], [43, "conformer-ctc"]], "Configurable options": [[38, "configurable-options"], [41, "configurable-options"], [43, "configurable-options"], [46, "configurable-options"], [48, "configurable-options"], [49, "configurable-options"], [60, "configurable-options"], [61, "configurable-options"], [62, "configurable-options"]], "Pre-configured options": [[38, "pre-configured-options"], [41, "pre-configured-options"], [43, "pre-configured-options"], [46, "pre-configured-options"], [48, "pre-configured-options"], [49, "pre-configured-options"], [60, "pre-configured-options"], [61, "pre-configured-options"], [62, "pre-configured-options"]], "Training logs": [[38, "training-logs"], [40, "training-logs"], [41, "training-logs"], [43, "training-logs"], [46, "training-logs"], [48, "training-logs"], [49, "training-logs"], [60, "training-logs"], [61, "training-logs"], [62, "training-logs"]], "Usage examples": [[38, "usage-examples"], [40, "usage-examples"], [41, "usage-examples"], [43, "usage-examples"]], "Case 1": [[38, "case-1"], [40, "case-1"], [41, "case-1"], [43, "case-1"]], "Case 2": [[38, "case-2"], [40, "case-2"], [41, "case-2"], [43, "case-2"]], "Case 3": [[38, "case-3"], [40, "case-3"], [43, "case-3"]], "Pre-trained Model": [[38, "pre-trained-model"], [40, "pre-trained-model"], [41, "pre-trained-model"], [43, "pre-trained-model"], [47, "pre-trained-model"], [51, "pre-trained-model"], [52, "pre-trained-model"], [54, "pre-trained-model"]], "Install kaldifeat": [[38, "install-kaldifeat"], [40, "install-kaldifeat"], [41, "install-kaldifeat"], [43, "install-kaldifeat"], [47, "install-kaldifeat"], [51, "install-kaldifeat"], [52, "install-kaldifeat"]], "Usage": [[38, "usage"], [40, "usage"], [41, "usage"], [43, "usage"]], "CTC decoding": [[38, "ctc-decoding"], [43, "ctc-decoding"], [43, "id2"]], "HLG decoding": [[38, "hlg-decoding"], [38, "id2"], [41, "hlg-decoding"], [43, "hlg-decoding"], [43, "id3"]], "HLG decoding + attention decoder rescoring": [[38, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[38, "colab-notebook"], [40, "colab-notebook"], [41, "colab-notebook"], [43, "colab-notebook"], [47, "colab-notebook"], [51, "colab-notebook"], [52, "colab-notebook"], [54, "colab-notebook"]], "Deployment with C++": [[38, "deployment-with-c"], [43, "deployment-with-c"]], "aishell": [[39, "aishell"]], "Stateless Transducer": [[40, "stateless-transducer"]], "The Model": [[40, "the-model"]], "The Loss": [[40, "the-loss"]], "Todo": [[40, "id1"]], "Greedy search": [[40, "greedy-search"]], "Beam search": [[40, "beam-search"]], "Modified Beam search": [[40, "modified-beam-search"]], "TDNN-LSTM CTC": [[41, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[42, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[43, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[43, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[43, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[43, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[43, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[44, "distillation-with-hubert"]], "Codebook index preparation": [[44, "codebook-index-preparation"]], "LibriSpeech": [[45, "librispeech"], [59, "librispeech"]], "Pruned transducer statelessX": [[46, "pruned-transducer-statelessx"], [61, "pruned-transducer-statelessx"]], "Usage example": [[46, "usage-example"], [48, "usage-example"], [49, "usage-example"], [60, "usage-example"], [61, "usage-example"], [62, "usage-example"]], "Export Model": [[46, "export-model"], [61, "export-model"], [62, "export-model"]], "Export model using torch.jit.script()": [[46, "export-model-using-torch-jit-script"], [48, "export-model-using-torch-jit-script"], [49, "export-model-using-torch-jit-script"], [61, "export-model-using-torch-jit-script"], [62, "export-model-using-torch-jit-script"]], "Download pretrained models": [[46, "download-pretrained-models"], [48, "download-pretrained-models"], [49, "download-pretrained-models"], [60, "download-pretrained-models"], [61, "download-pretrained-models"], [62, "download-pretrained-models"], [64, "download-pretrained-models"], [65, "download-pretrained-models"]], "Deploy with Sherpa": [[46, "deploy-with-sherpa"], [61, "deploy-with-sherpa"], [62, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[47, "tdnn-lstm-ctc"], [52, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[47, "inference-with-a-pre-trained-model"], [51, "inference-with-a-pre-trained-model"], [52, "inference-with-a-pre-trained-model"], [54, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[48, "zipformer-ctc-blank-skip"]], "Export models": [[48, "export-models"], [49, "export-models"], [60, "export-models"], [64, "export-models"], [65, "export-models"]], "Zipformer MMI": [[49, "zipformer-mmi"]], "TIMIT": [[50, "timit"]], "TDNN-LiGRU-CTC": [[51, "tdnn-ligru-ctc"]], "YesNo": [[53, "yesno"]], "TDNN-CTC": [[54, "tdnn-ctc"]], "Download kaldifeat": [[54, "download-kaldifeat"]], "RNN-LM": [[55, "rnn-lm"]], "Train an RNN language model": [[56, "train-an-rnn-language-model"]], "Streaming ASR": [[57, "streaming-asr"]], "Streaming Conformer": [[58, "streaming-conformer"]], "Streaming Emformer": [[58, "streaming-emformer"]], "LSTM Transducer": [[60, "lstm-transducer"]], "Which model to use": [[60, "which-model-to-use"]], "Export model using torch.jit.trace()": [[60, "export-model-using-torch-jit-trace"], [62, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[61, "simulate-streaming-decoding"], [62, "simulate-streaming-decoding"]], "Real streaming decoding": [[61, "real-streaming-decoding"], [62, "real-streaming-decoding"]], "Zipformer Transducer": [[62, "zipformer-transducer"]], "TTS": [[63, "tts"]], "VITS-LJSpeech": [[64, "vits-ljspeech"]], "Install extra dependencies": [[64, "install-extra-dependencies"]], "Build Monotonic Alignment Search": [[64, "build-monotonic-alignment-search"], [65, "build-monotonic-alignment-search"]], "Inference": [[64, "inference"], [65, "inference"]], "Usage in sherpa-onnx": [[64, "usage-in-sherpa-onnx"]], "Install sherpa-onnx": [[64, "install-sherpa-onnx"]], "Download lexicon files": [[64, "download-lexicon-files"]], "Run sherpa-onnx": [[64, "run-sherpa-onnx"]], "VITS-VCTK": [[65, "vits-vctk"]], "Recipes": [[66, "recipes"]]}, "indexentries": {}})
\ No newline at end of file