mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 10:02:22 +00:00
1 line
106 KiB
JavaScript
1 line
106 KiB
JavaScript
Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 22, 23, 24, 25, 26, 28, 29, 32, 36, 37, 39, 41], "tool": [0, 8, 13, 16], "make": [0, 1, 3, 16, 17, 18, 23, 25, 28, 41], "consist": [0, 25, 31, 43, 44, 45], "possibl": [0, 2, 3, 23, 28], "black": 0, "format": [0, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "flake8": 0, "check": [0, 13, 28], "qualiti": [0, 24], "isort": 0, "sort": [0, 13], "import": [0, 8, 13, 16, 44, 45], "The": [0, 1, 2, 4, 7, 8, 11, 13, 14, 16, 17, 18, 23, 24, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "version": [0, 12, 13, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 36, 37, 44], "abov": [0, 4, 6, 7, 8, 14, 16, 17, 18, 19, 23, 24, 25, 26, 28, 31, 33, 34, 39, 41, 43, 44, 45], "ar": [0, 1, 3, 4, 5, 6, 7, 8, 13, 14, 16, 17, 18, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45, 46], "22": [0, 13, 16, 17, 28, 36, 37, 39], "3": [0, 4, 6, 7, 8, 12, 14, 15, 19, 22, 26, 29, 31, 32, 33, 34, 39, 43, 44, 45], "0": [0, 1, 4, 6, 7, 12, 14, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "5": [0, 7, 15, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "4": [0, 4, 5, 6, 7, 8, 12, 14, 15, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "10": [0, 7, 12, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "1": [0, 4, 6, 7, 12, 14, 15, 19, 20, 21, 22, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "after": [0, 1, 6, 11, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "run": [0, 2, 8, 11, 13, 16, 17, 18, 19, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "command": [0, 1, 4, 6, 7, 8, 13, 14, 16, 17, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "git": [0, 4, 6, 7, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 32, 36, 37, 39], "clone": [0, 4, 6, 7, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 32, 36, 37, 39], "http": [0, 1, 2, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "github": [0, 2, 6, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "com": [0, 2, 6, 10, 11, 13, 14, 16, 17, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "k2": [0, 2, 8, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 43, 44, 45], "fsa": [0, 2, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 28, 31, 33, 34, 43, 44, 45], "icefal": [0, 2, 3, 4, 6, 7, 8, 10, 11, 14, 15, 19, 20, 21, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "cd": [0, 1, 2, 8, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "pip": [0, 1, 6, 8, 13, 16, 19, 25], "instal": [0, 1, 4, 6, 8, 9, 11, 12, 14, 15, 19, 22, 29, 31, 33, 34, 39, 43, 44, 45], "pre": [0, 3, 4, 6, 7, 9, 11, 12, 13, 15, 22, 29], "commit": [0, 13], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 10, 11, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "automat": [0, 11, 29], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 11, 16, 17, 18, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "ani": [0, 4, 6, 7, 13, 23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44], "your": [0, 1, 2, 4, 6, 7, 9, 11, 12, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "wa": [0, 14, 28, 32], "success": [0, 13, 16, 17], "pleas": [0, 1, 2, 4, 6, 7, 8, 11, 13, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "fix": [0, 8, 16, 17, 18, 28], "issu": [0, 4, 6, 7, 8, 13, 16, 17, 28, 29, 44, 45], "report": [0, 8, 29], "some": [0, 1, 4, 6, 14, 16, 17, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "i": [0, 1, 2, 4, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "e": [0, 2, 4, 5, 6, 7, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "modifi": [0, 15, 22, 23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "file": [0, 2, 11, 12, 14, 16, 17, 18, 20, 21, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "place": [0, 13, 14, 25, 28, 32], "so": [0, 4, 6, 7, 11, 12, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 11, 13, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "which": [0, 2, 4, 6, 7, 11, 13, 14, 16, 17, 18, 19, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 44, 45], "ha": [0, 2, 12, 13, 15, 16, 17, 18, 19, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 41, 43, 44, 45], "been": [0, 13, 15, 16, 17, 18, 25], "befor": [0, 1, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "further": [0, 4, 6, 7], "chang": [0, 4, 6, 7, 8, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "all": [0, 10, 11, 14, 16, 17, 18, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "again": [0, 16, 17, 39], "should": [0, 2, 4, 6, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "time": [0, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "succeed": 0, "want": [0, 4, 6, 7, 13, 14, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "can": [0, 1, 2, 4, 6, 7, 8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "do": [0, 2, 4, 6, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "Or": 0, "without": [0, 4, 6, 7, 9, 11, 23, 28], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 8, 13, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 10, 11, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "prepar": [1, 3, 4, 14], "environ": [1, 8, 16, 17, 18, 23, 24, 25, 26, 28, 29, 31, 32, 36, 37, 39, 44, 45], "doc": [1, 14, 41], "r": [1, 13, 16, 17, 18, 36, 37], "requir": [1, 4, 6, 13, 18, 29, 44, 45], "txt": [1, 4, 13, 16, 17, 18, 19, 23, 25, 26, 28, 32, 36, 37, 39], "set": [1, 4, 6, 7, 8, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "up": [1, 13, 14, 16, 17, 18, 23, 26, 28, 29, 31, 32, 33, 34, 44, 45], "readi": [1, 23, 28, 29], "refer": [1, 2, 6, 7, 13, 14, 15, 16, 17, 18, 20, 21, 23, 25, 26, 28, 31, 32, 33, 36, 37, 39, 41, 44, 45], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 13, 14, 16, 17, 18, 23, 25, 28], "local": [1, 13, 31, 33, 34, 43, 44, 45], "preview": 1, "what": [1, 2, 13, 16, 17, 18, 25, 41], "look": [1, 2, 4, 6, 7, 10, 13, 16, 17, 18, 23, 25, 26, 28, 29], "like": [1, 2, 11, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 39, 41, 43, 44], "publish": [1, 14, 24], "html": [1, 2, 8, 13, 15, 16, 17, 18, 19, 20, 21, 31, 43, 44, 45], "gener": [1, 6, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "view": [1, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 39, 43, 44, 45], "follow": [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "python3": [1, 8, 13, 17, 18], "m": [1, 13, 16, 17, 18, 25, 31, 33, 34, 36, 37, 43, 44, 45], "server": [1, 11, 43], "It": [1, 2, 6, 7, 9, 13, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "print": [1, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "serv": [1, 31, 33, 34, 43, 44, 45], "port": [1, 29, 31, 33, 34, 43, 44, 45], "8000": [1, 39], "open": [1, 4, 6, 7, 12, 14, 16, 17, 18, 24, 25, 28, 29], "browser": [1, 9, 11, 31, 33, 34, 43, 44, 45], "go": [1, 7, 23, 25, 28, 31, 33, 34, 43, 44, 45], "read": [2, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "code": [2, 3, 8, 12, 13, 16, 17, 18, 23, 28, 29, 31, 32, 36, 37, 39, 41, 44, 45], "style": [2, 3, 12], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 28, 31, 33, 34, 43, 44, 45], "recommend": [2, 6, 7, 13, 23, 25, 26, 28, 29, 31, 44, 45], "test": [2, 4, 12, 14, 15, 22, 23, 25, 26, 28, 29, 32, 33, 36, 37], "valid": [2, 13, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "dataset": [2, 8, 13, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "lhots": [2, 12, 14, 16, 17, 18, 23, 25, 28], "readthedoc": [2, 13], "io": [2, 13, 15, 16, 17, 18, 19, 20, 21, 31, 43, 44, 45], "en": [2, 13, 16], "latest": [2, 11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "index": [2, 13, 15, 16, 17, 18, 19, 20, 21, 43, 44, 45], "yesno": [2, 8, 12, 13, 27, 39, 46], "veri": [2, 3, 7, 16, 17, 18, 25, 36, 37, 39, 44, 45], "good": [2, 7], "exampl": [2, 11, 12, 14, 16, 17, 18, 20, 21, 22, 29, 32, 36, 37, 39], "speech": [2, 11, 12, 13, 15, 24, 25, 39, 46], "pull": [2, 4, 6, 7, 16, 17, 18, 19, 23, 25, 28, 41], "380": [2, 16, 37], "show": [2, 4, 6, 7, 11, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "add": [2, 16, 17, 18, 23, 25, 26, 44, 46], "new": [2, 3, 11, 13, 16, 17, 18, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 39, 43, 44, 45], "suppos": [2, 44, 45], "would": [2, 14, 16, 17, 18, 28, 32, 44, 45], "name": [2, 8, 14, 16, 17, 18, 19, 23, 25, 31, 33, 34, 44, 45], "foo": [2, 21, 23, 28, 31, 33, 34, 43, 44, 45], "eg": [2, 8, 10, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "mkdir": [2, 16, 17, 23, 25, 26, 28, 32, 36, 37, 39], "p": [2, 4, 13, 16, 17, 25, 36, 37], "asr": [2, 4, 6, 7, 8, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "touch": 2, "sh": [2, 13, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "chmod": 2, "x": [2, 4, 18, 41], "simpl": [2, 13, 25], "own": [2, 29, 31, 44, 45], "otherwis": [2, 16, 17, 18, 23, 25, 28, 29, 31, 33, 34, 43, 44, 45], "librispeech": [2, 4, 6, 7, 8, 10, 12, 14, 16, 17, 18, 19, 20, 21, 27, 28, 29, 31, 32, 33, 34, 40, 41, 43, 44, 45, 46], "assum": [2, 4, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 32, 36, 37, 39, 43, 44, 45], "fanci": 2, "call": [2, 8, 19, 29], "bar": [2, 21, 23, 28, 31, 33, 34, 43, 44, 45], "organ": 2, "wai": [2, 3, 22, 31, 33, 34, 41, 43, 44, 45], "readm": [2, 23, 25, 26, 28, 32, 36, 37, 39], "md": [2, 10, 14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "asr_datamodul": [2, 8, 13], "pretrain": [2, 4, 6, 7, 14, 16, 17, 18, 19, 21, 23, 25, 26, 28, 32, 36, 37, 39], "For": [2, 4, 6, 7, 8, 10, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "instanc": [2, 8, 10, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "tdnn": [2, 8, 13, 24, 27, 30, 35, 38], "its": [2, 4, 14, 15, 16, 17, 18, 21, 25, 33], "directori": [2, 12, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "structur": [2, 18], "descript": [2, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "contain": [2, 12, 14, 15, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45, 46], "inform": [2, 4, 6, 13, 14, 23, 25, 26, 28, 31, 32, 33, 36, 37, 39, 41, 43, 44, 45], "g": [2, 4, 5, 6, 7, 13, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "wer": [2, 13, 14, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "etc": [2, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "provid": [2, 11, 13, 14, 15, 16, 17, 18, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45, 46], "pytorch": [2, 8, 13, 16, 17, 18, 25], "dataload": [2, 13], "take": [2, 7, 14, 29, 31, 39, 44, 45], "input": [2, 14, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37, 39, 41], "checkpoint": [2, 4, 6, 7, 13, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "save": [2, 13, 14, 17, 18, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "dure": [2, 4, 5, 7, 8, 11, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "stage": [2, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "": [2, 4, 6, 7, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "definit": [2, 16, 17], "neural": [2, 4, 6, 7, 23, 28], "network": [2, 23, 25, 28, 31, 33, 34, 43, 44, 45], "script": [2, 6, 7, 12, 13, 21, 22, 23, 25, 26, 28, 29, 32, 36, 37, 39, 43], "infer": [2, 14, 16, 17], "tdnn_lstm_ctc": [2, 26, 32, 37], "conformer_ctc": [2, 23, 28], "get": [2, 11, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 39, 41, 43, 44, 45], "feel": [2, 29, 43], "result": [2, 4, 7, 10, 11, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "everi": [2, 14, 31, 33, 34, 43, 44, 45], "kept": [2, 31, 44, 45], "self": [2, 15, 18, 41], "toler": 2, "duplic": 2, "among": [2, 13], "differ": [2, 13, 16, 17, 18, 19, 23, 24, 28, 29, 31, 41, 43, 44, 45], "invoc": [2, 16, 17], "help": [2, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "blob": [2, 10, 13, 14, 21, 31, 33, 34, 43, 44, 45], "master": [2, 6, 10, 13, 14, 17, 18, 20, 21, 25, 29, 31, 33, 34, 43, 44, 45], "transform": [2, 6, 7, 23, 28, 43], "conform": [2, 20, 24, 25, 27, 30, 31, 33, 43, 44, 45], "base": [2, 4, 7, 18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "lstm": [2, 15, 21, 22, 24, 27, 30, 35, 40, 42], "attent": [2, 18, 25, 26, 29, 41, 44, 45], "lm": [2, 4, 7, 12, 13, 25, 31, 32, 36, 37, 39, 44, 45], "rescor": [2, 12, 26, 32, 34, 36, 37, 39], "demonstr": [2, 9, 11, 14, 19], "consid": [2, 4, 18], "colab": [2, 13], "notebook": [2, 13], "welcom": 3, "There": [3, 4, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "mani": [3, 13, 44, 45], "two": [3, 4, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "them": [3, 5, 6, 9, 10, 11, 16, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "To": [3, 4, 6, 7, 11, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "document": [3, 12, 14, 15, 16, 17, 18, 19, 34], "repositori": [3, 16, 17, 18, 19], "recip": [3, 4, 6, 7, 10, 12, 13, 14, 19, 23, 25, 26, 28, 29, 31, 32, 36, 37, 39, 41, 43, 44, 45], "In": [3, 4, 6, 8, 11, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 29, 32, 36, 37, 39, 41], "page": [3, 11, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "describ": [3, 5, 9, 14, 16, 17, 19, 20, 21, 22, 23, 25, 26, 28, 31, 32, 36, 37, 44, 45], "how": [3, 4, 5, 6, 7, 9, 11, 12, 13, 16, 17, 18, 19, 22, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "creat": [3, 4, 6, 7, 12, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44], "data": [3, 4, 6, 7, 14, 16, 17, 18, 19, 20, 21, 24], "train": [3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 20, 21, 22, 41], "decod": [3, 4, 8, 11, 12, 16, 17, 18, 21, 22], "model": [3, 4, 6, 7, 9, 11, 12, 13, 15, 29, 41], "As": [4, 5, 6, 7, 16, 25, 28, 29], "type": [4, 6, 7, 13, 14, 16, 17, 18, 23, 25, 28, 31, 33, 34, 39, 41, 43, 44, 45], "e2": [4, 7, 13], "usual": [4, 6, 7, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "an": [4, 5, 6, 7, 11, 13, 14, 16, 17, 18, 19, 20, 21, 23, 24, 25, 28, 29, 31, 34, 39, 43, 44, 45], "intern": [4, 5], "languag": [4, 7, 11, 12, 23, 25, 26], "learn": [4, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "level": [4, 5], "corpu": [4, 6, 7, 24], "real": 4, "life": 4, "scenario": 4, "often": [4, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "mismatch": [4, 44], "between": [4, 7, 31, 44, 45], "target": [4, 11, 13], "space": [4, 9, 12], "problem": [4, 6, 7, 13, 29], "when": [4, 6, 8, 11, 16, 17, 18, 22, 25, 28, 29, 31, 33, 34, 44, 45], "act": 4, "against": [4, 13], "extern": [4, 5, 6, 7], "tutori": [4, 6, 7, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 43, 44, 45], "low": [4, 16, 17], "order": [4, 13, 16, 17, 18, 23, 26, 28, 32, 36, 37], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 18], "improv": [4, 5, 6, 7, 25], "perform": [4, 6, 7, 15, 25, 29, 44], "languga": 4, "integr": [4, 11], "pruned_transducer_stateless7_stream": [4, 6, 7, 18, 19, 45], "stream": [4, 6, 7, 12, 15, 16, 17, 19, 22, 23, 28, 36, 37, 43, 46], "howev": [4, 6, 7, 14, 17, 29], "easili": [4, 6, 7, 23, 26, 28], "appli": [4, 6, 7, 25, 41], "other": [4, 7, 14, 17, 18, 19, 25, 28, 29, 31, 32, 36, 37, 39, 41, 44, 45, 46], "encount": [4, 6, 7, 8, 13, 18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "here": [4, 6, 7, 14, 16, 17, 18, 23, 25, 26, 28, 29, 32, 41, 44], "simplic": [4, 6, 7], "same": [4, 6, 7, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 10, 20, 43], "first": [4, 6, 8, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "let": [4, 6, 7, 13, 16, 17, 18, 23, 28], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 25, 41, 45], "address": [4, 11, 13, 14, 16, 17, 18, 25, 31, 34, 43, 44, 45], "sourc": [4, 13, 14, 16, 17, 18, 23, 24, 25, 28], "acoust": [4, 44, 45], "similar": [4, 5, 29, 33, 44, 45], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "score": [4, 5, 7, 23, 28, 31, 44, 45], "left": [4, 16, 18, 25, 44, 45], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 16, 25, 41, 44], "log": [4, 8, 13, 16, 17, 18, 32, 36, 37, 39], "y_": 4, "u": [4, 13, 16, 17, 18, 23, 25, 26, 28, 29, 39], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 8, 44], "weight": [4, 23, 26, 28, 33, 34, 43], "respect": 4, "onli": [4, 6, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45, 46], "compar": [4, 16, 17, 18, 44], "shallow": [4, 12], "fusion": [4, 12], "subtract": [4, 5], "work": [4, 16, 17, 18, 28], "treat": [4, 17, 18], "predictor": 4, "joiner": [4, 16, 17, 18, 19, 21, 25, 31, 43, 44, 45], "weak": 4, "captur": 4, "therefor": [4, 8], "n": [4, 5, 6, 13, 23, 29, 31, 33, 34, 36, 37, 43, 44, 45], "gram": [4, 6, 13, 23, 25, 26, 31, 32, 34, 36, 37, 44, 45], "approxim": [4, 5], "ilm": 4, "lead": [4, 7], "formula": 4, "rnnt": [4, 31, 44, 45], "bi": [4, 6], "addit": 4, "estim": 4, "comar": 4, "li": 4, "choic": 4, "accord": 4, "origin": [4, 5], "paper": [4, 5, 29, 31, 43, 44, 45], "achiev": [4, 6, 7, 41], "both": [4, 31, 33, 34, 41, 43, 44, 45], "intra": 4, "cross": 4, "much": [4, 16, 17], "faster": [4, 6], "evalu": 4, "now": [4, 6, 13, 16, 17, 18, 23, 28, 29, 31, 32, 33, 34, 36, 37, 43, 44, 45], "illustr": [4, 6, 7], "purpos": [4, 6, 7, 16, 17], "from": [4, 6, 7, 8, 9, 11, 13, 14, 16, 17, 18, 19, 23, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "link": [4, 6, 7, 10, 13, 14, 15, 31, 33, 34, 43, 44, 45], "scratch": [4, 6, 7, 31, 33, 34, 43, 44, 45], "prune": [4, 6, 7, 14, 18, 19, 25, 27, 29, 30, 40, 41, 42, 43, 45], "statelessx": [4, 6, 7, 27, 29, 30, 40, 41, 42], "initi": [4, 6, 7, 23, 26], "step": [4, 6, 7, 13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "download": [4, 6, 7, 8, 11, 12, 15, 22, 24, 29], "git_lfs_skip_smudg": [4, 6, 7, 16, 17, 18, 19], "huggingfac": [4, 6, 7, 10, 12, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 32, 33, 34, 36, 37, 39, 43], "co": [4, 6, 7, 10, 11, 13, 14, 16, 17, 18, 19, 23, 24, 25, 26, 28, 32, 33, 34, 36, 37, 39, 43], "zengwei": [4, 6, 7, 16, 18, 19, 34, 43], "stateless7": [4, 6, 7, 18, 19], "2022": [4, 6, 7, 14, 16, 17, 18, 19, 25, 31, 33, 34, 43, 44], "12": [4, 6, 7, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 31, 33, 34, 36, 39, 43, 44, 45], "29": [4, 6, 7, 13, 18, 19, 23, 25, 26, 28, 32, 33, 36, 37], "pushd": [4, 6, 7, 19], "exp": [4, 6, 7, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "lf": [4, 6, 7, 14, 16, 17, 18, 19, 23, 25, 26, 28, 32, 34, 36, 37, 39], "includ": [4, 6, 7, 16, 17, 18, 19, 31, 33, 34, 43, 44, 45], "pt": [4, 6, 7, 13, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "ln": [4, 6, 7, 14, 16, 17, 18, 19, 23, 28, 31, 33, 34, 43, 44, 45], "epoch": [4, 6, 7, 13, 14, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "99": [4, 6, 7, 13, 16, 17, 18, 19], "symbol": [4, 5, 6, 7, 13, 25, 31, 44, 45], "load": [4, 6, 7, 13, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "done": [4, 6, 7, 13, 14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "via": [4, 6, 7, 13, 15, 20, 21, 22], "exp_dir": [4, 6, 7, 13, 16, 17, 18, 25, 28, 29, 31, 33, 34, 44, 45], "avg": [4, 6, 7, 13, 14, 16, 17, 18, 19, 20, 21, 25, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "averag": [4, 6, 7, 13, 14, 16, 17, 18, 19, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "fals": [4, 6, 7, 13, 14, 16, 17, 18, 23, 25, 28, 29], "dir": [4, 6, 7, 14, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "bpe": [4, 5, 6, 7, 14, 16, 17, 18, 19, 20, 21, 28, 31, 33, 34, 43, 44, 45], "lang_bpe_500": [4, 6, 7, 14, 16, 17, 18, 19, 20, 21, 28, 31, 33, 34, 43, 44, 45], "max": [4, 6, 7, 13, 14, 16, 17, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "durat": [4, 6, 7, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "600": [4, 6, 7, 13, 14, 28, 31, 33, 43, 44, 45], "chunk": [4, 6, 7, 16, 18, 19, 44, 45], "len": [4, 6, 7, 18, 19, 45], "32": [4, 6, 7, 13, 16, 17, 18, 19, 23, 25, 26, 45], "method": [4, 7, 11, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 43, 44, 45], "modified_beam_search": [4, 5, 6, 7, 11, 25, 29, 31, 33, 43, 44, 45], "clean": [4, 13, 18, 23, 25, 28, 29, 31, 32, 33, 34, 43, 44, 45], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 8, 13, 16, 17, 19, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "best": [4, 5, 6, 7, 16, 17, 18, 23, 26, 28], "7": [4, 6, 7, 13, 14, 15, 18, 22, 23, 26, 28, 31, 32, 36, 37, 43, 44], "93": [4, 6, 7], "Then": [4, 6], "necessari": [4, 29], "note": [4, 5, 6, 7, 8, 14, 16, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "960": [4, 28, 31, 33, 34, 43, 44, 45], "hour": [4, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "ezerhouni": [4, 6, 7], "popd": [4, 6, 7, 19], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 13, 25, 39], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 13, 28], "lm_scale": [4, 6, 7], "42": [4, 13, 17, 23, 28, 39], "lodr_scal": 4, "24": [4, 8, 13, 16, 17, 26, 32, 36, 37, 39], "scale": [4, 6, 7, 16, 17, 23, 28, 29, 32, 34, 36, 37], "embed": [4, 6, 7, 25, 31, 43, 44, 45], "dim": [4, 6, 7, 16, 17, 18, 25, 31, 44], "2048": [4, 6, 7, 14, 16, 17, 18, 25], "hidden": [4, 6, 7, 17, 43], "num": [4, 6, 7, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "layer": [4, 6, 7, 16, 17, 18, 25, 29, 31, 41, 43, 44, 45], "vocab": [4, 6, 7, 28], "500": [4, 6, 7, 14, 16, 17, 18, 25, 28, 34, 43], "token": [4, 16, 17, 18, 19, 23, 25, 26, 28, 32, 36, 37, 39], "ngram": [4, 28, 32, 36, 37], "2": [4, 6, 7, 12, 14, 15, 22, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "extra": [4, 16, 17, 18, 25, 41, 44], "argument": [4, 7, 29, 41], "need": [4, 6, 11, 13, 14, 15, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "given": [4, 13, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 44, 45], "specifi": [4, 7, 8, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "neg": [4, 25], "number": [4, 7, 11, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "obtain": [4, 7, 23, 25, 26, 28, 32, 36, 37], "shown": [4, 7], "below": [4, 7, 13, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44], "61": [4, 6], "6": [4, 6, 7, 8, 15, 22, 23, 25, 28, 31, 32, 36, 37, 43], "74": [4, 6, 13, 14], "recal": 4, "lowest": [4, 31, 33, 34, 43, 44, 45], "77": [4, 6, 7, 13, 28], "08": [4, 6, 7, 18, 28, 32, 34, 36, 37, 39, 43], "inde": 4, "even": [4, 11, 13, 17], "better": [4, 6], "increas": [4, 6, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "8": [4, 6, 7, 8, 13, 14, 16, 17, 18, 23, 25, 28, 29, 31, 32, 33, 34, 39, 43, 44, 45], "45": [4, 6, 13, 16, 18, 23, 25, 28], "38": [4, 6, 13, 16, 23, 25, 28, 36], "23": [4, 6, 8, 13, 16, 17, 18, 23, 25, 26, 28, 36, 37, 39], "section": [5, 8, 9, 13, 14, 19, 20, 21, 22, 23, 28], "langugag": 5, "transduc": [5, 12, 14, 15, 19, 22, 24, 27, 29, 30, 40, 41, 42], "avail": [5, 6, 13, 14, 16, 17, 18, 23, 25, 28, 32, 36, 37, 39, 43], "beam": [5, 14, 43], "search": [5, 6, 7, 10, 11], "realli": [5, 23, 26, 28, 31, 33, 34, 43, 44, 45], "valu": [5, 7, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "rnn": [5, 6, 7, 12, 17, 25, 31, 33, 43, 44, 45], "t": [5, 13, 16, 17, 18, 19, 20, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "doe": [5, 16, 17, 18, 23, 25, 28, 39], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 9, 10, 13, 14, 15, 16, 17, 18, 19, 21, 23, 25, 26, 28, 31, 33, 34, 39, 41, 43, 44, 45], "known": 5, "modified_beam_search_lodr": [5, 6], "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6], "re": [5, 6, 8, 23, 26, 28, 29, 31, 33, 34, 41, 43, 44, 45], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 12], "commonli": [6, 7, 23, 25, 26, 28, 32, 36, 37, 39], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 13, 16, 17, 18, 23, 28, 29, 39, 41, 43, 44], "effici": [6, 7, 31, 44, 45], "than": [6, 13, 14, 17, 23, 25, 26, 28, 31, 32, 33, 34, 39, 43, 44, 45], "sinc": [6, 13, 16, 17, 18, 29, 39, 43], "less": [6, 14, 28, 32, 39, 44, 45], "comput": [6, 13, 14, 16, 17, 18, 23, 25, 26, 29, 31, 32, 34, 36, 37, 39, 43, 44, 45], "gpu": [6, 7, 13, 16, 17, 23, 25, 26, 28, 29, 31, 33, 34, 36, 37, 39, 43, 44, 45], "try": [6, 8, 9, 11, 29, 31, 33, 34, 43, 44, 45], "might": [6, 7, 17, 18, 44, 45], "ideal": [6, 7], "mai": [6, 7, 13, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45, 46], "With": [6, 13], "43": [6, 17, 18, 28], "great": 6, "made": [6, 16], "boost": [6, 7], "tabl": [6, 11, 16, 17, 18], "67": [6, 13], "59": [6, 13, 16, 26, 28], "86": 6, "fact": 6, "arpa": [6, 39], "performn": 6, "depend": [6, 13, 23, 28], "kenlm": 6, "kpu": 6, "archiv": 6, "zip": 6, "execut": [6, 7, 16, 23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "9": [6, 13, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 39, 43, 44, 45], "57": [6, 13, 17, 28, 32], "slightli": 6, "63": [6, 25], "04": [6, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37], "52": [6, 13, 23, 28], "73": 6, "mention": 6, "earlier": 6, "benchmark": [6, 25], "speed": [6, 16, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "132": 6, "95": [6, 24], "177": [6, 13, 14, 17, 18, 25, 26, 28], "96": [6, 13], "210": [6, 36, 37], "262": [6, 7], "62": [6, 7, 13, 28, 32], "65": [6, 7, 13, 16], "352": [6, 7, 28], "58": [6, 7, 8, 13, 28], "488": [6, 7, 16, 17, 18], "400": [6, 24], "610": 6, "870": 6, "156": 6, "203": [6, 14, 28], "255": [6, 17, 18], "160": 6, "263": [6, 13, 17], "singl": [6, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "32g": 6, "v100": [6, 23, 25, 26, 28], "vari": 6, "word": [7, 23, 25, 26, 28, 32, 36, 37, 39], "error": [7, 8, 13, 16, 17, 18, 28], "rate": [7, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "These": [7, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "alreadi": [7, 13, 14], "But": [7, 16, 31, 33, 34, 43, 44, 45], "long": [7, 16], "true": [7, 13, 14, 16, 17, 18, 23, 25, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "either": [7, 11, 23, 25, 26, 28, 44, 45], "choos": [7, 11, 13, 29, 31, 33, 34, 43, 44, 45], "three": [7, 16, 17, 18, 21, 23, 25, 41], "associ": 7, "dimens": [7, 31, 44, 45], "obviou": 7, "rel": 7, "reduct": [7, 13, 16, 17, 33], "around": 7, "A": [7, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 43, 44, 45], "few": [7, 16, 17, 18, 29], "paramet": [7, 14, 16, 17, 18, 20, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 43, 44, 45], "tune": [7, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "control": [7, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "too": 7, "small": [7, 25, 36, 37, 39], "fulli": 7, "util": [7, 8, 13, 28], "larg": 7, "domin": 7, "bad": 7, "typic": [7, 23, 25, 26, 28], "activ": [7, 11, 13], "path": [7, 11, 13, 14, 16, 17, 18, 21, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "trade": 7, "off": [7, 16], "accuraci": [7, 16, 17, 24], "larger": [7, 17, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "slower": 7, "collect": [8, 13], "user": 8, "post": 8, "correspond": [8, 10, 11], "solut": 8, "One": 8, "torch": [8, 12, 14, 15, 22, 23, 25, 28], "torchaudio": [8, 12, 41], "cu111": 8, "torchvis": 8, "f": [8, 13, 36, 37], "org": [8, 13, 24, 25, 31, 43, 44, 45], "whl": [8, 13], "torch_stabl": [8, 13], "throw": [8, 16, 17, 18], "cuda": [8, 12, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 43, 44, 45], "while": [8, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "That": [8, 16, 17, 29, 31, 43, 44, 45], "cu11": 8, "correct": 8, "traceback": 8, "most": [8, 44, 45], "recent": [8, 16, 17, 18], "last": 8, "line": [8, 16, 17, 18, 31, 44, 45], "14": [8, 13, 14, 16, 17, 20, 23, 28, 31, 32, 33, 36, 43, 44, 45], "yesnoasrdatamodul": 8, "home": [8, 16, 17, 23, 28], "xxx": [8, 14, 16, 17, 18], "next": [8, 11, 13, 16, 17, 18, 28, 29, 31, 32, 33, 34, 43, 44, 45], "gen": [8, 11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "kaldi": [8, 11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "34": [8, 16, 17], "datamodul": 8, "__init__": [8, 13, 14, 16, 17, 18, 23, 25, 28], "add_eo": 8, "add_so": 8, "get_text": 8, "39": [8, 13, 16, 18, 25, 28, 32, 36], "tensorboard": [8, 13, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "summarywrit": 8, "miniconda3": 8, "env": 8, "yyi": 8, "lib": [8, 13, 18], "site": [8, 13, 18], "packag": [8, 13, 18], "loosevers": 8, "uninstal": 8, "setuptool": [8, 13], "conda": 8, "dev": [8, 13, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "yangyifan": 8, "anaconda3": 8, "dev20230112": 8, "cuda11": [8, 13], "torch1": [8, 13], "13": [8, 13, 14, 16, 17, 18, 25, 26, 28, 32, 33, 36], "py3": [8, 13], "linux": [8, 11, 13, 15, 16, 17, 18, 19], "x86_64": [8, 13, 16], "egg": 8, "_k2": [8, 13], "determinizeweightpushingtyp": 8, "handl": [8, 23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "except": [8, 14], "anoth": 8, "occur": 8, "pruned_transducer_stateless7_ctc_b": [8, 33], "104": [8, 13], "30": [8, 13, 16, 17, 18, 23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "rais": 8, "anaconda": 8, "maco": [8, 11, 15, 16, 17, 18, 19], "probabl": [8, 25, 31, 33, 43, 44, 45], "variabl": [8, 13, 16, 17, 18, 23, 26, 28, 29, 31, 33, 34, 43, 44, 45], "export": [8, 12, 13, 23, 25, 26, 28, 29, 32, 36, 37, 39], "dyld_library_path": 8, "conda_prefix": 8, "find": [8, 9, 10, 11, 14, 16, 17, 18, 21, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "locat": [8, 16], "libpython": 8, "abl": 8, "insid": [8, 21], "codna_prefix": 8, "ld_library_path": 8, "within": [9, 11, 16, 17], "anyth": [9, 11], "youtub": [9, 12, 28, 29, 31, 32, 33, 34, 43, 44, 45], "video": [9, 12, 28, 29, 31, 32, 33, 34, 43, 44, 45], "upload": [10, 11, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "visit": [10, 11, 31, 33, 34, 43, 44, 45], "specif": [10, 19, 25], "aishel": [10, 12, 23, 25, 26, 27, 46], "wenetspeech": [10, 20], "framework": [11, 31, 44], "sherpa": [11, 15, 20, 21, 22, 43], "window": [11, 15, 16, 17, 18, 19], "ipad": 11, "phone": 11, "start": [11, 13, 14, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "recognit": [11, 12, 15, 16, 17, 24, 25, 39, 46], "screenshot": [11, 23, 25, 26, 28, 29, 31, 39, 43, 44], "select": [11, 13, 16, 17, 18, 31, 32, 36, 37, 39, 43, 44, 45], "current": [11, 16, 17, 25, 29, 41, 43, 44, 45, 46], "chines": [11, 24, 25], "english": [11, 39, 43], "greedi": 11, "record": [11, 17, 18, 23, 24, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "click": [11, 13, 23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "button": 11, "submit": 11, "wait": 11, "moment": 11, "bottom": [11, 31, 33, 34, 43, 44, 45], "part": [11, 13, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "one": [11, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "subscrib": [11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "channel": [11, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "nadira": [11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "povei": [11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "www": [11, 13, 24, 28, 29, 31, 32, 33, 34, 43, 44, 45], "uc_vaumpkminz1pnkfxan9mw": [11, 13, 28, 29, 31, 32, 33, 34, 43, 44, 45], "toolkit": 12, "cudnn": 12, "frequent": 12, "ask": 12, "question": 12, "faq": 12, "oserror": 12, "libtorch_hip": 12, "cannot": [12, 16, 17, 18], "share": [12, 13], "object": [12, 13, 23, 25, 26, 31, 39, 43, 44], "attributeerror": 12, "modul": [12, 16, 18, 33, 44], "distutil": 12, "attribut": [12, 18, 28], "importerror": 12, "libpython3": 12, "No": [12, 16, 17, 18, 39], "state_dict": [12, 22, 23, 25, 26, 28, 32, 36, 37, 39], "jit": [12, 15, 22, 28], "trace": [12, 15, 20, 22], "onnx": [12, 14, 22], "ncnn": [12, 22], "non": [12, 28, 41, 44, 46], "timit": [12, 27, 36, 37, 46], "introduct": [12, 40, 46], "contribut": 12, "guid": 13, "setup": [13, 16, 23, 25, 26, 28, 29, 31, 32, 36, 37, 39, 44, 45], "matter": [13, 16], "don": [13, 16, 17, 18, 20, 23, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "suggest": [13, 31, 33, 34, 43, 44, 45], "compil": [13, 16, 17, 23, 25, 28], "wheel": [13, 16], "from_wheel": 13, "alwai": [13, 14], "strongli": 13, "pythonpath": [13, 16, 17, 18], "point": [13, 14, 23, 26, 28, 29, 31, 33, 34, 43, 44, 45], "folder": [13, 14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "tmp": [13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "put": [13, 16, 17, 33, 44], "sever": [13, 14, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "switch": [13, 23, 28, 34], "just": [13, 16, 17, 18, 41], "about": [13, 16, 17, 18, 25, 29, 31, 34, 43, 44, 45], "kuangfangjun": [13, 16, 17, 18], "virtualenv": 13, "cpython3": 13, "final": [13, 14, 16, 17, 28, 32], "64": [13, 14, 16, 25, 44], "9422m": 13, "creator": 13, "cpython3posix": 13, "dest": 13, "star": [13, 16, 17, 18], "fj": [13, 14, 16, 17, 18, 25, 28], "fangjun": [13, 14, 16, 17, 18, 25, 28], "clear": 13, "no_vcs_ignor": 13, "global": 13, "seeder": 13, "fromappdata": 13, "bundl": 13, "copi": [13, 41], "app_data_dir": 13, "ad": [13, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 39, 41, 43, 44, 45], "seed": 13, "bashactiv": 13, "cshellactiv": 13, "fishactiv": 13, "nushellactiv": 13, "powershellactiv": 13, "pythonactiv": 13, "bin": [13, 16, 17, 18, 23, 28], "determin": 13, "nvidia": [13, 23, 25, 26, 28], "smi": 13, "head": [13, 25, 41], "wed": [13, 16, 23, 25, 28], "jul": 13, "26": [13, 16, 17, 18, 25, 28, 37], "21": [13, 14, 16, 23, 25, 28, 36, 37], "49": [13, 16, 17, 28, 37, 39], "2023": [13, 16, 17, 18, 33], "510": 13, "47": [13, 16, 17, 18, 23, 28], "03": [13, 14, 17, 25, 28, 36, 37, 43], "driver": 13, "greater": 13, "our": [13, 16, 17, 18, 20, 21, 28, 29, 31, 41, 44, 45], "case": [13, 14, 16, 17, 18, 31, 33, 34, 43, 44, 45], "verifi": 13, "nvcc": 13, "copyright": 13, "c": [13, 25, 26, 31, 33, 34, 39, 43, 44, 45], "2005": 13, "2019": 13, "corpor": 13, "built": 13, "wed_oct_23_19": 13, "38_pdt_2019": 13, "releas": [13, 14, 16, 17, 18, 23, 25, 28], "v10": 13, "89": [13, 23], "cu116": 13, "compat": 13, "audio": [13, 36, 37], "stabl": 13, "matrix": 13, "appropri": 13, "2bcu116": 13, "cp38": 13, "linux_x86_64": 13, "1983": 13, "mb": [13, 16, 17, 18], "________________________________________": 13, "gb": [13, 25], "764": 13, "kb": [13, 16, 17, 18, 36, 37], "eta": 13, "00": [13, 16, 23, 25, 26, 28, 32, 36, 37, 39], "satisfi": 13, "extens": 13, "successfulli": [13, 16, 17, 18], "__version__": 13, "dev20230725": 13, "pypi": 13, "tuna": 13, "tsinghua": 13, "edu": 13, "cn": 13, "csukuangfj": [13, 14, 16, 17, 19, 23, 25, 26, 28, 32, 36, 37, 39, 43], "resolv": 13, "main": [13, 23, 28, 41], "ubuntu": [13, 16, 17, 18], "2bcuda11": 13, "manylinux_2_17_x86_64": 13, "manylinux2014_x86_64": 13, "graphviz": 13, "cach": [13, 18], "de": [13, 14, 16, 17, 18, 25], "5e": 13, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 13, "20": [13, 14, 16, 18, 23, 25, 26, 28, 31, 32, 36, 37, 39, 44], "none": [13, 23, 28], "sha1": [13, 14, 16, 17, 18, 23, 25, 28], "4c05309499a08454997adf500b56dcc629e35ae5": 13, "date": [13, 14, 16, 17, 18, 23, 25, 28], "tue": [13, 16, 28], "25": [13, 14, 16, 17, 23, 28, 31, 36, 37, 39, 44], "16": [13, 14, 16, 17, 18, 21, 23, 25, 26, 28, 31, 32, 36, 37, 39, 43, 44, 45], "36": [13, 16, 25, 28, 29], "o": 13, "cento": 13, "2009": 13, "core": 13, "cmake": [13, 16, 17, 23, 28], "27": [13, 16, 17, 18, 23, 25, 32, 37], "gcc": 13, "cmake_cuda_flag": 13, "wno": 13, "deprec": [13, 25], "lineinfo": 13, "expt": 13, "extend": 13, "lambda": 13, "use_fast_math": 13, "xptxa": 13, "w": [13, 28, 36, 37], "gencod": 13, "arch": 13, "compute_35": 13, "sm_35": 13, "compute_50": 13, "sm_50": 13, "compute_60": 13, "sm_60": 13, "compute_61": 13, "sm_61": 13, "compute_70": 13, "sm_70": 13, "compute_75": 13, "sm_75": 13, "compute_80": 13, "sm_80": 13, "compute_86": 13, "sm_86": 13, "donnx_namespac": 13, "onnx_c2": 13, "compute_52": 13, "sm_52": 13, "xcudaf": 13, "diag_suppress": 13, "cc_clobber_ignor": 13, "integer_sign_chang": 13, "useless_using_declar": 13, "set_but_not_us": 13, "field_without_dll_interfac": 13, "base_class_has_different_dll_interfac": 13, "dll_interface_conflict_none_assum": 13, "dll_interface_conflict_dllexport_assum": 13, "implicit_return_from_non_void_funct": 13, "unsigned_compare_with_zero": 13, "declared_but_not_referenc": 13, "bad_friend_decl": 13, "relax": 13, "constexpr": 13, "d_glibcxx_use_cxx11_abi": 13, "option": [13, 15, 19, 22, 25, 29, 32, 36, 37, 39], "wall": 13, "strict": [13, 18, 24], "overflow": 13, "unknown": 13, "pragma": 13, "cmake_cxx_flag": 13, "unus": 13, "nvtx": 13, "enabl": [13, 29], "disabl": [13, 14, 16, 17], "debug": 13, "sync": 13, "kernel": [13, 16, 18, 25], "cpu": [13, 14, 16, 17, 18, 20, 23, 31, 33, 34, 39, 44, 45], "memori": [13, 16, 23, 25, 28, 41], "alloc": 13, "214748364800": 13, "byte": [13, 16, 17, 18], "200": [13, 14, 16, 17, 18, 23, 28, 29, 36, 37, 39], "abort": 13, "__file__": 13, "cpython": [13, 16], "gnu": [13, 16], "req": 13, "vq12fd5i": 13, "filter": 13, "quiet": [13, 24], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 13, "metadata": [13, 36, 37], "pyproject": 13, "toml": 13, "cytoolz": 13, "1e": 13, "3b": 13, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 13, "33": [13, 16, 17, 23, 24, 25, 28, 36], "pyyaml": 13, "c8": 13, "6b": 13, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 13, "ma": 13, "nylinux_2_17_x86_64": 13, "736": 13, "dataclass": 13, "2f": 13, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 13, "dev0": 13, "7640d66": 13, "lilcom": 13, "a8": 13, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 13, "linux_2_17_x86_64": 13, "87": [13, 16], "tqdm": 13, "e6": 13, "02": [13, 14, 16, 17, 18, 25, 28, 31, 37, 43, 44], "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 13, "numpi": 13, "18": [13, 16, 17, 18, 23, 25, 26, 28, 31, 32, 36, 37, 43, 44, 45], "audioread": 13, "5d": 13, "cb": 13, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 13, "tar": 13, "gz": 13, "377": 13, "tabul": 13, "40": [13, 16, 17, 18, 26, 28, 32, 36, 37], "44": [13, 16, 17, 28, 36, 37], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 13, "35": [13, 14, 16, 17, 18, 25, 28, 43], "1a": 13, "70": 13, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 13, "97": [13, 16, 23], "ab": [13, 31, 43, 44, 45], "c3": 13, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 13, "48": [13, 16, 17, 23, 25], "intervaltre": 13, "50": [13, 14, 16, 17, 18, 28, 31, 36, 43, 44, 45], "fb": 13, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 13, "soundfil": 13, "bd": 13, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 13, "py2": 13, "46": [13, 17, 23, 28], "toolz": 13, "7f": 13, "5c": 13, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 13, "55": [13, 16, 26, 28, 36], "sortedcontain": 13, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 13, "cffi": 13, "b7": 13, "8b": 13, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 13, "15": [13, 14, 16, 17, 18, 25, 26, 28, 36, 39], "442": 13, "pycpars": 13, "d5": 13, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 13, "118": [13, 28], "filenam": [13, 16, 17, 18, 19, 20, 21, 33, 34, 43, 45], "size": [13, 14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "687627": 13, "sha256": 13, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 13, "store": [13, 28], "ephem": 13, "wwtk90_m": 13, "7a": 13, "8e": 13, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 13, "23704": 13, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 13, "9c": 13, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 13, "26098": 13, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 13, "f3": 13, "ed": 13, "2b": 13, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 13, "remot": 13, "enumer": 13, "12942": 13, "count": 13, "100": [13, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "compress": 13, "56": [13, 16, 17, 28, 36], "total": [13, 17, 18, 23, 25, 26, 28, 29, 31, 32, 39, 43, 44], "delta": 13, "17": [13, 14, 16, 17, 18, 23, 28, 36, 37, 43], "reus": 13, "pack": [13, 44, 45], "12875": 13, "receiv": 13, "mib": 13, "8835": 13, "07": [13, 16, 17, 18, 23, 25, 26, 28], "41": [13, 16, 18, 23, 25, 36, 39], "dl_dir": [13, 23, 26, 28, 29, 31, 33, 34, 43, 44, 45], "waves_yesno": 13, "___________________________________________________": 13, "70m": 13, "1mb": 13, "manifest": [13, 29], "fbank": [13, 14, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37, 39], "718": 13, "info": [13, 14, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37, 39], "compute_fbank_yesno": 13, "process": [13, 14, 16, 17, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "extract": [13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "featur": [13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "_______________________________________________________________________________": 13, "90": [13, 16], "01": [13, 16, 25, 26, 28, 29, 33], "82it": 13, "778": 13, "______________________________________________________________________________": 13, "256": [13, 18, 36, 37], "92it": 13, "51": [13, 16, 23, 28, 39], "lang": [13, 14, 25, 28, 34], "66": [13, 17], "project": 13, "kaldilm": 13, "csrc": [13, 28], "arpa_file_pars": 13, "cc": 13, "void": 13, "arpafilepars": 13, "std": 13, "istream": 13, "79": 13, "140": [13, 26], "92": [13, 28], "hlg": [13, 32, 36, 37, 39], "275": [13, 23], "compile_hlg": 13, "124": [13, 23, 28], "lang_phon": [13, 26, 32, 36, 37, 39], "276": 13, "lexicon": [13, 23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "171": [13, 26, 28, 36, 37], "convert": [13, 16, 17, 18, 28], "l": [13, 16, 17, 18, 25, 36, 37, 39], "linv": [13, 25, 28, 39], "309": 13, "ctc_topo": 13, "max_token_id": 13, "310": 13, "314": 13, "intersect": [13, 31, 44, 45], "323": 13, "lg": [13, 31, 34, 44, 45], "shape": [13, 18], "connect": [13, 14, 28, 31, 32, 43, 44, 45], "68": [13, 28], "class": [13, 28], "tensor": [13, 17, 18, 23, 25, 26, 28, 31, 39, 43, 44], "71": [13, 28, 32], "341": 13, "rag": 13, "raggedtensor": 13, "76": [13, 39], "remov": [13, 23, 25, 26, 28, 32, 36, 37], "disambigu": 13, "354": 13, "91": 13, "remove_epsilon": 13, "445": 13, "arc": 13, "compos": 13, "h": 13, "446": 13, "106": [13, 17, 28], "109": [13, 23, 28], "447": 13, "111": [13, 28], "127": [13, 16, 17, 39], "cuda_visible_devic": [13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "segment": 13, "fault": 13, "dump": 13, "protocol_buffers_python_implement": 13, "674": 13, "interest": [13, 29, 31, 33, 34, 43, 44, 45], "936": 13, "481": 13, "482": 13, "posixpath": [13, 16, 17, 18, 25, 28], "lang_dir": [13, 25, 28], "lr": [13, 25, 43], "feature_dim": [13, 14, 16, 17, 18, 23, 25, 28, 39], "weight_decai": 13, "06": [13, 14, 16, 26, 28, 32, 39], "start_epoch": 13, "best_train_loss": [13, 14, 16, 17, 18], "inf": [13, 14, 16, 17, 18], "best_valid_loss": [13, 14, 16, 17, 18], "best_train_epoch": [13, 14, 16, 17, 18], "best_valid_epoch": [13, 14, 17, 18], "batch_idx_train": [13, 14, 16, 17, 18], "log_interv": [13, 14, 16, 17, 18], "reset_interv": [13, 14, 16, 17, 18], "valid_interv": [13, 14, 16, 17, 18], "beam_siz": [13, 14, 25], "sum": 13, "use_double_scor": [13, 23, 28, 39], "world_siz": [13, 29], "master_port": 13, "12354": 13, "num_epoch": 13, "feature_dir": [13, 28], "max_dur": [13, 28], "bucketing_sampl": [13, 28], "num_bucket": [13, 28], "concatenate_cut": [13, 28], "duration_factor": [13, 28], "gap": [13, 28], "on_the_fly_feat": [13, 28], "shuffl": [13, 28], "return_cut": [13, 28], "num_work": [13, 28], "env_info": [13, 14, 16, 17, 18, 23, 25, 28], "branch": [13, 14, 16, 17, 18, 23, 25, 28, 33], "3fb0a43": 13, "thu": [13, 14, 16, 17, 18, 25, 28, 32], "05": [13, 14, 16, 17, 23, 25, 26, 28, 37], "hostnam": [13, 14, 16, 17, 18, 25], "74279": [13, 14, 16, 17, 18, 25], "1220091118": 13, "57c4d55446": 13, "sph26": 13, "ip": [13, 14, 16, 17, 18, 25], "941": 13, "168": [13, 32], "949": 13, "495": 13, "devic": [13, 14, 16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 44, 45], "965": [13, 23], "146": 13, "cut": [13, 28], "244": 13, "967": 13, "149": [13, 16, 28], "199": [13, 28, 32], "singlecutsampl": 13, "205": [13, 28], "968": 13, "218": [13, 17], "252": 13, "565": [13, 28], "422": 13, "batch": [13, 16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "loss": [13, 16, 17, 23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "065": 13, "over": [13, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "2436": 13, "frame": [13, 25, 31, 33, 44, 45], "tot_loss": 13, "53": [13, 18, 23, 31, 32, 37, 43, 44], "681": [13, 16], "4561": 13, "2828": 13, "7076": 13, "22192": 13, "54": [13, 17, 18, 28, 32, 36, 37], "167": 13, "444": 13, "9002": 13, "18067": 13, "011": 13, "2555": 13, "2695": 13, "484": 13, "34971": 13, "331": [13, 16, 17, 28, 32], "4688": 13, "368": 13, "75": [13, 16], "633": 13, "2532": 13, "242": [13, 23, 28], "1139": 13, "1592": 13, "522": [13, 28], "1627": 13, "209": [13, 32], "07055": 13, "1175": 13, "07091": 13, "640": [13, 18], "847": 13, "07731": 13, "427": [13, 17, 28], "04391": 13, "05341": 13, "884": 13, "04384": 13, "387": [13, 37], "03458": 13, "04616": 13, "707": [13, 23, 28], "03379": 13, "758": [13, 28], "433": [13, 28], "01054": 13, "980": [13, 28], "009014": 13, "009974": 13, "489": [13, 23], "01085": 13, "258": [13, 36, 37], "01172": 13, "01055": 13, "621": [13, 39], "01074": 13, "699": 13, "866": 13, "01044": 13, "844": 13, "008942": 13, "221": [13, 28], "01082": 13, "970": [13, 28], "01169": 13, "28": [13, 16, 17, 25, 28, 32], "247": 13, "01073": 13, "326": [13, 17], "555": 13, "840": 13, "264": [13, 18], "search_beam": [13, 23, 28, 39], "output_beam": [13, 23, 28, 39], "min_active_st": [13, 23, 28, 39], "max_active_st": [13, 23, 28, 39], "10000": [13, 23, 28, 39], "841": 13, "855": 13, "273": [13, 14, 25], "868": 13, "291": 13, "882": 13, "883": 13, "157": 13, "204": [13, 18, 28], "until": [13, 28, 33], "701": 13, "241": [13, 23], "transcript": [13, 23, 24, 25, 26, 28, 31, 32, 36, 37, 43, 44, 45], "recog": [13, 25, 28], "test_set": [13, 39], "702": [13, 28], "564": 13, "240": [13, 23, 39], "ins": [13, 28, 39], "del": [13, 28, 39], "sub": [13, 28, 39], "704": [13, 23, 36], "249": [13, 17], "wrote": [13, 28], "detail": [13, 15, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "stat": [13, 28], "err": [13, 25, 28], "316": [13, 28], "congratul": [13, 16, 17, 18, 23, 26, 28, 32, 36, 37, 39], "fun": [13, 16, 17], "variou": [13, 19, 22, 46], "period": [14, 16], "disk": 14, "optim": [14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "relat": [14, 23, 25, 28, 32, 36, 37, 39], "resum": [14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "strip": 14, "reduc": [14, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "each": [14, 16, 17, 19, 23, 25, 26, 28, 31, 33, 34, 41, 43, 44, 45], "well": [14, 39, 46], "usag": [14, 16, 17, 18, 20, 21, 32, 36, 37, 39], "pruned_transducer_stateless3": [14, 20, 41], "almost": [14, 31, 41, 44, 45], "dict": [14, 18], "host": 14, "stateless3": [14, 16], "repo": [14, 19], "prefix": 14, "those": 14, "wave": [14, 16, 17, 18, 23, 28], "iter": [14, 16, 17, 18, 21, 31, 33, 34, 43, 44, 45], "1224000": 14, "greedy_search": [14, 25, 31, 33, 43, 44, 45], "test_wav": [14, 16, 17, 18, 19, 23, 25, 26, 28, 32, 36, 37, 39], "1089": [14, 16, 17, 18, 19, 28, 32], "134686": [14, 16, 17, 18, 19, 28, 32], "0001": [14, 16, 17, 18, 19, 28, 32], "wav": [14, 16, 17, 18, 19, 21, 23, 25, 26, 28, 31, 33, 34, 36, 37, 39, 43, 44, 45], "1221": [14, 16, 17, 28, 32], "135766": [14, 16, 17, 28, 32], "0002": [14, 16, 17, 28, 32], "multipl": [14, 23, 25, 26, 28, 32, 36, 37, 39], "sound": [14, 16, 17, 18, 21, 22, 23, 25, 26, 28, 32, 36, 37, 39], "Its": [14, 16, 17, 18, 28], "output": [14, 16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "19": [14, 16, 17, 18, 23, 28, 32, 36, 37], "09": [14, 17, 23, 25, 26, 28, 43], "233": [14, 16, 17], "265": 14, "3000": [14, 16, 17, 18], "80": [14, 16, 17, 18, 23, 25, 28], "subsampling_factor": [14, 17, 18, 23, 25, 28], "encoder_dim": [14, 16, 17, 18], "512": [14, 16, 17, 18, 23, 25, 28], "nhead": [14, 16, 18, 23, 25, 28, 31, 44], "dim_feedforward": [14, 16, 17, 25], "num_encoder_lay": [14, 16, 17, 18, 25], "decoder_dim": [14, 16, 17, 18], "joiner_dim": [14, 16, 17, 18], "model_warm_step": [14, 16, 17], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 14, "fri": 14, "oct": [14, 28], "miss": [14, 16, 17, 18, 25, 28], "cu102": [14, 16, 17, 18], "1013": 14, "c39cba5": 14, "dirti": [14, 16, 17, 23, 28], "ceph": [14, 23, 25, 28], "jsonl": 14, "0324160024": 14, "65bfd8b584": 14, "jjlbn": 14, "bpe_model": [14, 16, 17, 18, 28], "sound_fil": [14, 23, 25, 28, 39], "sample_r": [14, 23, 25, 28, 39], "16000": [14, 23, 25, 26, 28, 32, 33, 36, 37], "max_context": 14, "max_stat": 14, "context_s": [14, 16, 17, 18, 25], "max_sym_per_fram": [14, 25], "simulate_stream": 14, "decode_chunk_s": 14, "left_context": 14, "dynamic_chunk_train": 14, "causal_convolut": 14, "short_chunk_s": [14, 18, 44, 45], "num_left_chunk": [14, 18], "blank_id": [14, 16, 17, 18, 25], "unk_id": 14, "vocab_s": [14, 16, 17, 18, 25], "271": [14, 17], "612": 14, "458": 14, "giga": [14, 17, 43], "623": 14, "277": 14, "78648040": 14, "951": [14, 28], "285": [14, 25, 28], "construct": [14, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37, 39], "952": 14, "295": [14, 23, 25, 26, 28], "957": 14, "301": [14, 28], "700": 14, "329": [14, 17, 28], "912": 14, "388": 14, "earli": [14, 16, 17, 18, 28, 32], "nightfal": [14, 16, 17, 18, 28, 32], "THE": [14, 16, 17, 18, 28, 32], "yellow": [14, 16, 17, 18, 28, 32], "lamp": [14, 16, 17, 18, 28, 32], "light": [14, 16, 17, 18, 28, 32], "AND": [14, 16, 17, 18, 28, 32], "THERE": [14, 16, 17, 18, 28, 32], "squalid": [14, 16, 17, 18, 28, 32], "quarter": [14, 16, 17, 18, 28, 32], "OF": [14, 16, 17, 18, 28, 32], "brothel": [14, 16, 17, 18, 28, 32], "god": [14, 28, 32], "AS": [14, 28, 32], "direct": [14, 28, 32], "consequ": [14, 28, 32], "sin": [14, 28, 32], "man": [14, 28, 32], "punish": [14, 28, 32], "had": [14, 28, 32], "her": [14, 28, 32], "love": [14, 28, 32], "child": [14, 28, 32], "whose": [14, 25, 28, 32], "ON": [14, 16, 28, 32], "THAT": [14, 28, 32], "dishonor": [14, 28, 32], "bosom": [14, 28, 32], "TO": [14, 28, 32], "parent": [14, 28, 32], "forev": [14, 28, 32], "WITH": [14, 28, 32], "race": [14, 28, 32], "descent": [14, 28, 32], "mortal": [14, 28, 32], "BE": [14, 28, 32], "bless": [14, 28, 32], "soul": [14, 28, 32], "IN": [14, 28, 32], "heaven": [14, 28, 32], "yet": [14, 16, 17, 28, 32], "THESE": [14, 28, 32], "thought": [14, 28, 32], "affect": [14, 28, 32], "hester": [14, 28, 32], "prynn": [14, 28, 32], "hope": [14, 24, 28, 32], "apprehens": [14, 28, 32], "390": 14, "down": [14, 23, 28, 31, 33, 34, 43, 44, 45], "reproduc": [14, 28], "9999": [14, 33, 34, 43], "symlink": 14, "pass": [14, 18, 23, 25, 26, 28, 31, 33, 34, 41, 43, 44, 45], "reason": [14, 16, 17, 18, 44], "support": [15, 16, 17, 18, 23, 25, 28, 31, 33, 34, 41, 43, 44, 45], "zipform": [15, 19, 22, 27, 30, 40, 42], "convemform": [15, 22, 41], "platform": [15, 19], "android": [15, 16, 17, 18, 19], "raspberri": [15, 19], "pi": [15, 19], "\u7231\u82af\u6d3e": 15, "maix": 15, "iii": 15, "axera": 15, "rv1126": 15, "static": 15, "produc": [15, 31, 33, 34, 43, 44, 45], "binari": [15, 16, 17, 18, 23, 25, 26, 28, 31, 39, 43, 44], "everyth": 15, "pnnx": [15, 22], "torchscript": [15, 20, 21, 22], "encod": [15, 19, 21, 22, 23, 25, 26, 28, 31, 32, 33, 39, 41, 43, 44, 45], "int8": [15, 22], "quantiz": [15, 22, 29], "conv": [16, 17], "emform": [16, 17, 20], "stateless2": [16, 17, 43], "cpp": [16, 20], "pretrained_model": [16, 17, 18], "online_transduc": 16, "continu": [16, 17, 18, 19, 23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "jit_xxx": [16, 17, 18], "anywher": [16, 17], "submodul": 16, "updat": [16, 17, 18], "recurs": 16, "init": 16, "dcmake_build_typ": [16, 23, 28], "dncnn_python": 16, "dncnn_build_benchmark": 16, "dncnn_build_exampl": 16, "dncnn_build_tool": 16, "j4": 16, "pwd": 16, "src": [16, 18], "compon": [16, 41], "ncnn2int8": [16, 17], "am": 16, "sai": [16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "later": [16, 17, 18, 23, 26, 28, 31, 32, 33, 34, 36, 37, 43, 44, 45], "termin": 16, "tencent": [16, 17], "modif": [16, 25], "offic": 16, "synchron": 16, "offici": 16, "renam": [16, 17, 18], "conv_emformer_transducer_stateless2": [16, 41], "length": [16, 18, 25, 44, 45], "cnn": [16, 18], "31": [16, 17, 18, 28], "context": [16, 25, 31, 41, 43, 44, 45], "configur": [16, 18, 25, 29, 32, 36, 37, 39], "accordingli": [16, 17, 18], "yourself": [16, 17, 18, 29, 44, 45], "combin": [16, 17, 18], "677": 16, "220": [16, 25, 26, 28], "229": [16, 23], "best_v": 16, "alid_epoch": 16, "subsampl": [16, 44, 45], "ing_factor": 16, "a34171ed85605b0926eebbd0463d059431f4f74a": 16, "dec": 16, "ver": 16, "ion": 16, "530e8a1": 16, "op": 16, "1220120619": [16, 17, 18], "7695ff496b": [16, 17, 18], "s9n4w": [16, 17, 18], "icefa": 16, "ll": 16, "transdu": 16, "cer": 16, "use_averaged_model": [16, 17, 18], "cnn_module_kernel": [16, 18], "left_context_length": 16, "chunk_length": 16, "right_context_length": 16, "memory_s": 16, "231": [16, 17, 18], "053": 16, "112": [16, 17, 18], "022": 16, "708": [16, 23, 25, 28, 39], "315": [16, 23, 25, 26, 28, 32], "75490012": 16, "318": [16, 17], "320": [16, 25], "682": 16, "lh": [16, 17, 18], "rw": [16, 17, 18], "root": [16, 17, 18], "289m": 16, "jan": [16, 17, 18], "289": 16, "roughli": [16, 17, 18], "equal": [16, 17, 18, 44, 45], "1024": [16, 17, 18, 43], "287": [16, 39], "1010k": [16, 17], "decoder_jit_trac": [16, 17, 18, 21, 43, 45], "283m": 16, "encoder_jit_trac": [16, 17, 18, 21, 43, 45], "0m": [16, 17], "joiner_jit_trac": [16, 17, 18, 21, 43, 45], "sure": [16, 17, 18], "could": [16, 17, 18, 23, 26], "found": [16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "param": [16, 17, 18], "503k": [16, 17], "437": [16, 17, 18], "142m": 16, "79k": 16, "5m": [16, 17], "architectur": [16, 17, 18, 43], "editor": [16, 17, 18], "content": [16, 17, 18], "283": [16, 18], "1010": [16, 17], "142": [16, 23, 26, 28], "503": [16, 17], "convers": [16, 17, 18], "half": [16, 17, 18, 31, 44, 45], "v": [16, 17, 18, 28, 36, 37], "default": [16, 17, 18, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "float32": [16, 17, 18], "float16": [16, 17, 18], "occupi": [16, 17, 18], "twice": [16, 17, 18], "smaller": [16, 17, 18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "fp16": [16, 17, 18, 31, 33, 34, 43, 44, 45], "won": [16, 17, 18, 19, 23, 26, 28, 29, 31, 33, 34, 43, 44, 45], "accept": [16, 17, 18], "216": [16, 23, 28, 36, 37], "encoder_param_filenam": [16, 17, 18], "encoder_bin_filenam": [16, 17, 18], "decoder_param_filenam": [16, 17, 18], "decoder_bin_filenam": [16, 17, 18], "joiner_param_filenam": [16, 17, 18], "joiner_bin_filenam": [16, 17, 18], "sound_filenam": [16, 17, 18], "141": 16, "328": 16, "151": 16, "176": [16, 25, 28], "336": 16, "106000": [16, 17, 18, 28, 32], "581": [16, 32], "381": 16, "7767517": [16, 17, 18], "1060": 16, "1342": 16, "in0": [16, 17, 18], "explan": [16, 17, 18], "magic": [16, 17, 18], "intermedi": [16, 17, 18], "mean": [16, 17, 18, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 41, 43, 44, 45], "increment": [16, 17, 18], "1061": 16, "sherpametadata": [16, 17, 18], "sherpa_meta_data1": [16, 17, 18], "still": [16, 17, 18], "newli": [16, 17, 18], "must": [16, 17, 18, 44], "kei": [16, 17, 18, 28], "eas": [16, 17, 18], "list": [16, 17, 18, 23, 25, 26, 28, 32, 36, 37], "pair": [16, 17, 18], "sad": [16, 17, 18], "rememb": [16, 17, 18], "anymor": [16, 17, 18], "flexibl": [16, 17, 18], "edit": [16, 17, 18], "arm": [16, 17, 18], "aarch64": [16, 17, 18], "onc": [16, 17], "mayb": [16, 17], "year": [16, 17], "_jit_trac": [16, 17], "fp32": [16, 17], "doubl": [16, 17], "j": [16, 17, 23, 28], "py38": [16, 17, 18], "arg": [16, 17], "wave_filenam": [16, 17], "16k": [16, 17], "hz": [16, 17, 36, 37], "mono": [16, 17], "calibr": [16, 17], "cat": [16, 17], "eof": [16, 17], "calcul": [16, 17, 33, 44, 45], "has_gpu": [16, 17], "config": [16, 17], "use_vulkan_comput": [16, 17], "88": [16, 25], "conv_87": 16, "942385": [16, 17], "threshold": [16, 17, 33], "938493": 16, "968131": 16, "conv_88": 16, "442448": 16, "549335": 16, "167552": 16, "conv_89": 16, "228289": 16, "001738": 16, "871552": 16, "linear_90": 16, "976146": 16, "101789": 16, "115": [16, 17, 23, 28], "267128": 16, "linear_91": 16, "962030": 16, "162033": 16, "602713": 16, "linear_92": 16, "323041": 16, "853959": 16, "953129": 16, "linear_94": 16, "905416": 16, "648006": 16, "323545": 16, "linear_93": 16, "474093": 16, "200188": 16, "linear_95": 16, "888012": 16, "403563": 16, "483986": 16, "linear_96": 16, "856741": 16, "398679": 16, "524273": 16, "linear_97": 16, "635942": 16, "613655": 16, "590950": 16, "linear_98": 16, "460340": 16, "670146": 16, "398010": 16, "linear_99": 16, "532276": 16, "585537": 16, "119396": 16, "linear_101": 16, "585871": 16, "719224": 16, "205809": 16, "linear_100": 16, "751382": 16, "081648": 16, "linear_102": 16, "593344": 16, "450581": 16, "551147": 16, "linear_103": 16, "592681": 16, "705824": 16, "257959": 16, "linear_104": 16, "752957": 16, "980955": 16, "110489": 16, "linear_105": 16, "696240": 16, "877193": 16, "608953": 16, "linear_106": 16, "059659": 16, "643138": 16, "048950": 16, "linear_108": 16, "975461": 16, "589567": 16, "671457": 16, "linear_107": 16, "190381": 16, "515701": 16, "linear_109": 16, "710759": 16, "305635": 16, "082436": 16, "linear_110": 16, "531228": 16, "731162": 16, "159557": 16, "linear_111": 16, "528083": 16, "259322": 16, "211544": 16, "linear_112": 16, "148807": 16, "500842": 16, "087374": 16, "linear_113": 16, "592566": 16, "948851": 16, "166611": 16, "linear_115": 16, "437109": 16, "608947": 16, "642395": 16, "linear_114": 16, "193942": 16, "503904": 16, "linear_116": 16, "966980": 16, "200896": 16, "676392": 16, "linear_117": 16, "451303": 16, "061664": 16, "951344": 16, "linear_118": 16, "077262": 16, "965800": 16, "023804": 16, "linear_119": 16, "671615": 16, "847613": 16, "198460": 16, "linear_120": 16, "625638": 16, "131427": 16, "556595": 16, "linear_122": 16, "274080": 16, "888716": 16, "978189": 16, "linear_121": 16, "420480": 16, "429659": 16, "linear_123": 16, "826197": 16, "599617": 16, "281532": 16, "linear_124": 16, "396383": 16, "325849": 16, "335875": 16, "linear_125": 16, "337198": 16, "941410": 16, "221970": 16, "linear_126": 16, "699965": 16, "842878": 16, "224073": 16, "linear_127": 16, "775370": 16, "884215": 16, "696438": 16, "linear_129": 16, "872276": 16, "837319": 16, "254213": 16, "linear_128": 16, "180057": 16, "687883": 16, "linear_130": 16, "150427": 16, "454298": 16, "765789": 16, "linear_131": 16, "112692": 16, "924847": 16, "025545": 16, "linear_132": 16, "852893": 16, "116593": 16, "749626": 16, "linear_133": 16, "517084": 16, "024665": 16, "275314": 16, "linear_134": 16, "683807": 16, "878618": 16, "743618": 16, "linear_136": 16, "421055": 16, "322729": 16, "086264": 16, "linear_135": 16, "309880": 16, "917679": 16, "linear_137": 16, "827781": 16, "744595": 16, "915554": 16, "linear_138": 16, "422395": 16, "742882": 16, "402161": 16, "linear_139": 16, "527538": 16, "866123": 16, "849449": 16, "linear_140": 16, "128619": 16, "657793": 16, "266134": 16, "linear_141": 16, "839593": 16, "845993": 16, "021378": 16, "linear_143": 16, "442304": 16, "099039": 16, "889746": 16, "linear_142": 16, "325038": 16, "849592": 16, "linear_144": 16, "929444": 16, "618206": 16, "605080": 16, "linear_145": 16, "382126": 16, "321095": 16, "625010": 16, "linear_146": 16, "894987": 16, "867645": 16, "836517": 16, "linear_147": 16, "915313": 16, "906028": 16, "886522": 16, "linear_148": 16, "614287": 16, "908151": 16, "496181": 16, "linear_150": 16, "724932": 16, "485588": 16, "312899": 16, "linear_149": 16, "161146": 16, "606939": 16, "linear_151": 16, "164453": 16, "847355": 16, "719223": 16, "linear_152": 16, "086471": 16, "984121": 16, "222834": 16, "linear_153": 16, "099524": 16, "991601": 16, "816805": 16, "linear_154": 16, "054585": 16, "489706": 16, "286930": 16, "linear_155": 16, "389185": 16, "100321": 16, "963501": 16, "linear_157": 16, "982999": 16, "154796": 16, "637253": 16, "linear_156": 16, "537706": 16, "875190": 16, "linear_158": 16, "420287": 16, "502287": 16, "531588": 16, "linear_159": 16, "014746": 16, "423280": 16, "477261": 16, "linear_160": 16, "633553": 16, "715335": 16, "220921": 16, "linear_161": 16, "371849": 16, "117830": 16, "815203": 16, "linear_162": 16, "492933": 16, "126283": 16, "623318": 16, "linear_164": 16, "697504": 16, "825712": 16, "317358": 16, "linear_163": 16, "078367": 16, "008038": 16, "linear_165": 16, "023975": 16, "836278": 16, "577358": 16, "linear_166": 16, "860619": 16, "259792": 16, "493614": 16, "linear_167": 16, "380934": 16, "496160": 16, "107042": 16, "linear_168": 16, "691216": 16, "733317": 16, "831076": 16, "linear_169": 16, "723948": 16, "952728": 16, "129707": 16, "linear_171": 16, "034811": 16, "366547": 16, "665123": 16, "linear_170": 16, "356277": 16, "710501": 16, "linear_172": 16, "556884": 16, "729481": 16, "166058": 16, "linear_173": 16, "033039": 16, "207264": 16, "442120": 16, "linear_174": 16, "597379": 16, "658676": 16, "768131": 16, "linear_2": [16, 17], "293503": 16, "305265": 16, "877850": 16, "linear_1": [16, 17], "812222": 16, "766452": 16, "487047": 16, "linear_3": [16, 17], "999999": 16, "999755": 16, "031174": 16, "wish": [16, 17], "955k": 16, "18k": 16, "inparam": [16, 17], "inbin": [16, 17], "outparam": [16, 17], "outbin": [16, 17], "99m": 16, "78k": 16, "774k": [16, 17], "496": [16, 17, 28, 32], "replac": [16, 17], "774": [16, 17], "linear": [16, 17, 25], "convolut": [16, 17, 33, 41, 44], "exact": [16, 17], "4x": [16, 17], "comparison": 16, "468000": [17, 21, 43], "lstm_transducer_stateless2": [17, 21, 43], "862": 17, "222": [17, 26, 28], "865": 17, "is_pnnx": 17, "62e404dd3f3a811d73e424199b3408e309c06e1a": [17, 18], "mon": [17, 18], "6d7a559": [17, 18], "feb": [17, 18, 25], "147": [17, 18], "rnn_hidden_s": 17, "aux_layer_period": 17, "235": 17, "239": [17, 25], "472": 17, "595": 17, "324": 17, "83137520": 17, "596": 17, "325": 17, "257024": 17, "781812": 17, "327": 17, "84176356": 17, "182": [17, 18, 23, 32], "158": 17, "183": [17, 36, 37], "335": 17, "101": 17, "tracerwarn": [17, 18], "boolean": [17, 18], "caus": [17, 18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "incorrect": [17, 18, 25], "flow": [17, 18], "constant": [17, 18], "futur": [17, 18, 25, 46], "need_pad": 17, "bool": 17, "259": [17, 23], "180": [17, 23, 28], "339": 17, "304": 17, "207": [17, 26, 28], "84": [17, 23], "324m": 17, "321": [17, 23], "107": [17, 32], "318m": 17, "159m": 17, "21k": 17, "159": [17, 28, 39], "37": [17, 23, 25, 28, 36], "861": 17, "425": [17, 28], "266": [17, 18, 28, 32], "431": 17, "342": 17, "343": 17, "267": [17, 25, 36, 37], "379": 17, "268": [17, 28, 32], "317m": 17, "317": 17, "conv_15": 17, "930708": 17, "972025": 17, "conv_16": 17, "978855": 17, "031788": 17, "456645": 17, "conv_17": 17, "868437": 17, "830528": 17, "218575": 17, "linear_18": 17, "107259": 17, "194808": 17, "293236": 17, "linear_19": 17, "193777": 17, "634748": 17, "401705": 17, "linear_20": 17, "259933": 17, "606617": 17, "722160": 17, "linear_21": 17, "186600": 17, "790260": 17, "512129": 17, "linear_22": 17, "759041": 17, "265832": 17, "050053": 17, "linear_23": 17, "931209": 17, "099090": 17, "979767": 17, "linear_24": 17, "324160": 17, "215561": 17, "321835": 17, "linear_25": 17, "800708": 17, "599352": 17, "284134": 17, "linear_26": 17, "492444": 17, "153369": 17, "274391": 17, "linear_27": 17, "660161": 17, "720994": 17, "674126": 17, "linear_28": 17, "415265": 17, "174434": 17, "007133": 17, "linear_29": 17, "038418": 17, "118534": 17, "724262": 17, "linear_30": 17, "072084": 17, "936867": 17, "259155": 17, "linear_31": 17, "342712": 17, "599489": 17, "282787": 17, "linear_32": 17, "340535": 17, "120308": 17, "701103": 17, "linear_33": 17, "846987": 17, "630030": 17, "985939": 17, "linear_34": 17, "686298": 17, "204571": 17, "607586": 17, "linear_35": 17, "904821": 17, "575518": 17, "756420": 17, "linear_36": 17, "806659": 17, "585589": 17, "118401": 17, "linear_37": 17, "402340": 17, "047157": 17, "162680": 17, "linear_38": 17, "174589": 17, "923361": 17, "030258": 17, "linear_39": 17, "178576": 17, "556058": 17, "807705": 17, "linear_40": 17, "901954": 17, "301267": 17, "956539": 17, "linear_41": 17, "839805": 17, "597429": 17, "716181": 17, "linear_42": 17, "178945": 17, "651595": 17, "895699": 17, "829245": 17, "627592": 17, "637907": 17, "746186": 17, "255032": 17, "167313": 17, "000000": 17, "999756": 17, "031013": 17, "345k": 17, "17k": 17, "218m": 17, "counterpart": 17, "bit": [17, 23, 25, 26, 28, 32, 39], "4532": 17, "feedforward": [18, 25, 31, 44], "384": [18, 28], "192": [18, 28], "unmask": 18, "downsampl": [18, 24], "factor": [18, 23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "473": [18, 28], "246": [18, 25, 28, 36, 37], "477": 18, "warm_step": 18, "2000": [18, 26], "feedforward_dim": 18, "attention_dim": [18, 23, 25, 28], "encoder_unmasked_dim": 18, "zipformer_downsampling_factor": 18, "decode_chunk_len": 18, "257": [18, 25, 36, 37], "023": 18, "zipformer2": 18, "419": 18, "At": [18, 23, 28], "stack": 18, "downsampling_factor": 18, "037": 18, "655": 18, "346": 18, "68944004": 18, "347": 18, "260096": 18, "348": [18, 36], "716276": 18, "656": [18, 28], "349": 18, "69920376": 18, "351": 18, "353": 18, "174": [18, 28], "175": 18, "1344": 18, "assert": 18, "cached_len": 18, "num_lay": 18, "1348": 18, "cached_avg": 18, "1352": 18, "cached_kei": 18, "1356": 18, "cached_v": 18, "1360": 18, "cached_val2": 18, "1364": 18, "cached_conv1": 18, "1368": 18, "cached_conv2": 18, "1373": 18, "left_context_len": 18, "1884": 18, "x_size": 18, "2442": 18, "2449": 18, "2469": 18, "2473": 18, "2483": 18, "kv_len": 18, "k": [18, 31, 36, 37, 43, 44, 45], "2570": 18, "attn_output": 18, "bsz": 18, "num_head": 18, "seq_len": 18, "head_dim": 18, "2926": 18, "lorder": 18, "2652": 18, "2653": 18, "embed_dim": 18, "2666": 18, "1543": 18, "in_x_siz": 18, "1637": 18, "1643": 18, "in_channel": 18, "1571": 18, "1763": 18, "src1": 18, "src2": 18, "1779": 18, "dim1": 18, "1780": 18, "dim2": 18, "_trace": 18, "958": 18, "tracer": 18, "instead": [18, 25, 44], "tupl": 18, "namedtupl": 18, "absolut": 18, "know": [18, 29], "side": 18, "allow": [18, 31, 44], "behavior": [18, 25], "_c": 18, "_create_method_from_trac": 18, "646": 18, "357": 18, "102": [18, 23], "embedding_out": 18, "686": 18, "361": [18, 28, 32], "735": 18, "69": 18, "269m": 18, "269": [18, 23, 36, 37], "725": [18, 32], "1022k": 18, "266m": 18, "8m": 18, "509k": 18, "133m": 18, "152k": 18, "4m": 18, "1022": 18, "133": 18, "509": 18, "260": [18, 28], "360": 18, "365": 18, "280": [18, 28], "372": [18, 23], "state": [18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "026": 18, "410": 18, "411": [18, 28], "2028": 18, "2547": 18, "2029": 18, "23316": 18, "23317": 18, "23318": 18, "23319": 18, "23320": 18, "amount": [18, 24], "pad": [18, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "conv2dsubsampl": 18, "v2": [18, 23, 28], "arrai": 18, "23300": 18, "element": 18, "onnx_pretrain": 19, "onnxruntim": 19, "separ": 19, "deploi": [19, 23, 28], "repo_url": 19, "basenam": 19, "tree": [20, 21, 23, 25, 26, 28, 32, 36, 37, 39, 43], "cpu_jit": [20, 23, 28, 31, 33, 34, 44, 45], "confus": 20, "move": [20, 31, 33, 34, 44, 45], "why": 20, "streaming_asr": [20, 21, 43, 44, 45], "conv_emform": 20, "offline_asr": [20, 31], "jit_pretrain": [21, 33, 34, 43], "baz": 21, "1best": [23, 26, 28, 32, 33, 34, 36, 37], "automag": [23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "stop": [23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "By": [23, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "musan": [23, 26, 28, 29, 31, 33, 34, 43, 44, 45], "thei": [23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "intal": [23, 26], "sudo": [23, 26], "apt": [23, 26], "permiss": [23, 26], "commandlin": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "quit": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "experi": [23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "world": [23, 25, 26, 28, 29, 31, 32, 33, 34, 43, 44, 45], "multi": [23, 25, 26, 28, 29, 31, 33, 34, 41, 43, 44, 45], "machin": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "ddp": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "implement": [23, 25, 26, 28, 29, 31, 33, 34, 41, 43, 44, 45], "present": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "second": [23, 25, 26, 28, 29, 31, 33, 34, 39, 43, 44, 45], "utter": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "oom": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "due": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "decai": [23, 26, 28, 33, 34, 43], "warmup": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "function": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "get_param": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "directli": [23, 25, 26, 28, 29, 31, 33, 34, 43, 44, 45], "perturb": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "actual": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "3x150": [23, 25, 26], "450": [23, 25, 26], "visual": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "logdir": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "labelsmooth": 23, "someth": [23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "tensorflow": [23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "press": [23, 25, 26, 28, 31, 33, 34, 39, 43, 44, 45], "ctrl": [23, 25, 26, 28, 31, 33, 34, 39, 43, 44, 45], "engw8ksktzqs24zbv5dgcg": 23, "2021": [23, 26, 28, 32, 36, 37, 39], "22t11": 23, "scan": [23, 25, 26, 28, 31, 39, 43, 44], "116068": 23, "scalar": [23, 25, 26, 28, 31, 39, 43, 44], "listen": [23, 25, 26, 31, 39, 43, 44], "url": [23, 25, 26, 28, 31, 33, 34, 39, 43, 44], "xxxx": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "saw": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "consol": [23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "avoid": [23, 25, 28], "nbest": [23, 28, 34], "lattic": [23, 26, 28, 31, 32, 36, 37, 44, 45], "uniqu": [23, 28, 31, 44, 45], "pkufool": [23, 26, 32], "icefall_asr_aishell_conformer_ctc": 23, "transcrib": [23, 25, 26, 28], "v1": [23, 26, 28, 32, 36, 37], "lang_char": [23, 25], "bac009s0764w0121": [23, 25, 26], "bac009s0764w0122": [23, 25, 26], "bac009s0764w0123": [23, 25, 26], "tran": [23, 26, 28, 32, 36, 37], "graph": [23, 26, 28, 31, 32, 36, 37, 44, 45], "id": [23, 26, 28, 32, 36, 37], "conveni": [23, 26, 28, 29], "eo": [23, 26, 28], "soxi": [23, 25, 26, 28, 32, 39], "sampl": [23, 25, 26, 28, 32, 33, 39, 44, 45], "precis": [23, 25, 26, 28, 31, 32, 39, 44, 45], "67263": [23, 25, 26], "cdda": [23, 25, 26, 28, 32, 39], "sector": [23, 25, 26, 28, 32, 39], "135k": [23, 25, 26], "256k": [23, 25, 26, 28], "sign": [23, 25, 26, 28, 39], "integ": [23, 25, 26, 28, 39], "pcm": [23, 25, 26, 28, 39], "65840": [23, 25, 26], "308": [23, 25, 26], "625": [23, 25, 26], "132k": [23, 25, 26], "64000": [23, 25, 26], "300": [23, 25, 26, 28, 29, 31, 44], "128k": [23, 25, 26, 39], "displai": [23, 25, 26, 28], "topologi": [23, 28], "num_decoder_lay": [23, 28], "vgg_frontend": [23, 25, 28], "use_feat_batchnorm": [23, 28], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 23, "sun": 23, "sep": 23, "33cfe45": 23, "d57a873": 23, "nov": [23, 28], "hw": 23, "kangwei": 23, "icefall_aishell3": 23, "k2_releas": 23, "tokens_fil": 23, "words_fil": [23, 28, 39], "num_path": [23, 28, 31, 44, 45], "ngram_lm_scal": [23, 28], "attention_decoder_scal": [23, 28], "nbest_scal": [23, 28], "sos_id": [23, 28], "eos_id": [23, 28], "num_class": [23, 28, 39], "4336": [23, 25], "131": [23, 28], "134": 23, "138": [23, 25], "293": [23, 28], "369": [23, 28], "\u751a": [23, 25], "\u81f3": [23, 25], "\u51fa": [23, 25], "\u73b0": [23, 25], "\u4ea4": [23, 25], "\u6613": [23, 25], "\u51e0": [23, 25], "\u4e4e": [23, 25], "\u505c": [23, 25], "\u6b62": 23, "\u7684": [23, 25, 26], "\u60c5": [23, 25], "\u51b5": [23, 25], "\u4e00": [23, 25], "\u4e8c": [23, 25], "\u7ebf": [23, 25, 26], "\u57ce": [23, 25], "\u5e02": [23, 25], "\u867d": [23, 25], "\u7136": [23, 25], "\u4e5f": [23, 25, 26], "\u5904": [23, 25], "\u4e8e": [23, 25], "\u8c03": [23, 25], "\u6574": [23, 25], "\u4e2d": [23, 25, 26], "\u4f46": [23, 25, 26], "\u56e0": [23, 25], "\u4e3a": [23, 25], "\u805a": [23, 25], "\u96c6": [23, 25], "\u4e86": [23, 25, 26], "\u8fc7": [23, 25], "\u591a": [23, 25], "\u516c": [23, 25], "\u5171": [23, 25], "\u8d44": [23, 25], "\u6e90": [23, 25], "371": 23, "683": 23, "684": [23, 39], "651": [23, 39], "654": 23, "659": 23, "752": 23, "887": 23, "340": 23, "370": 23, "\u751a\u81f3": [23, 26], "\u51fa\u73b0": [23, 26], "\u4ea4\u6613": [23, 26], "\u51e0\u4e4e": [23, 26], "\u505c\u6b62": 23, "\u60c5\u51b5": [23, 26], "\u4e00\u4e8c": [23, 26], "\u57ce\u5e02": [23, 26], "\u867d\u7136": [23, 26], "\u5904\u4e8e": [23, 26], "\u8c03\u6574": [23, 26], "\u56e0\u4e3a": [23, 26], "\u805a\u96c6": [23, 26], "\u8fc7\u591a": [23, 26], "\u516c\u5171": [23, 26], "\u8d44\u6e90": [23, 26], "recor": [23, 28], "highest": [23, 28], "966": 23, "821": 23, "822": 23, "826": 23, "916": 23, "345": 23, "888": 23, "889": 23, "limit": [23, 25, 28, 41, 44], "upgrad": [23, 28], "pro": [23, 28], "finish": [23, 25, 26, 28, 29, 31, 32, 36, 37, 39, 44, 45], "NOT": [23, 25, 28, 39], "checkout": [23, 28], "hlg_decod": [23, 28], "four": [23, 28], "messag": [23, 28, 31, 33, 34, 43, 44, 45], "nn_model": [23, 28], "use_gpu": [23, 28], "word_tabl": [23, 28], "caution": [23, 28], "forward": [23, 28, 33], "cu": [23, 28], "int": [23, 28], "char": [23, 28], "98": 23, "150": [23, 28], "693": [23, 36], "165": [23, 28], "nnet_output": [23, 28], "185": [23, 28, 39], "217": [23, 28], "mandarin": 24, "beij": 24, "shell": 24, "technologi": 24, "ltd": 24, "peopl": 24, "accent": 24, "area": 24, "china": 24, "invit": 24, "particip": 24, "conduct": 24, "indoor": 24, "high": 24, "fidel": 24, "microphon": 24, "16khz": 24, "manual": 24, "through": 24, "profession": 24, "annot": 24, "inspect": 24, "free": [24, 29, 43], "academ": 24, "moder": 24, "research": 24, "field": 24, "openslr": 24, "ctc": [24, 27, 30, 34, 35, 38], "stateless": [24, 27, 31, 43, 44, 45], "conv1d": [25, 31, 43, 44, 45], "nn": [25, 31, 33, 34, 43, 44, 45], "tanh": 25, "borrow": 25, "ieeexplor": 25, "ieee": 25, "stamp": 25, "jsp": 25, "arnumb": 25, "9054419": 25, "predict": [25, 29, 31, 43, 44, 45], "charact": 25, "unit": 25, "vocabulari": 25, "87939824": 25, "optimized_transduc": 25, "technqiu": 25, "end": [25, 31, 33, 34, 39, 43, 44, 45], "furthermor": 25, "maximum": 25, "emit": 25, "per": [25, 31, 44, 45], "simplifi": [25, 41], "significantli": 25, "degrad": 25, "exactli": 25, "unprun": 25, "advantag": 25, "minim": 25, "pruned_transducer_stateless": [25, 31, 41, 44], "altern": 25, "though": 25, "transducer_stateless_modifi": 25, "pr": 25, "ram": 25, "tri": 25, "prob": [25, 43], "219": [25, 28], "lagz6hrcqxoigbfd5e0y3q": 25, "03t14": 25, "8477": 25, "250": [25, 32], "sym": [25, 31, 44, 45], "beam_search": [25, 31, 44, 45], "decoding_method": 25, "beam_4": 25, "ensur": 25, "give": 25, "poor": 25, "531": [25, 26], "994": [25, 28], "027": 25, "encoder_out_dim": 25, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 25, "50d2281": 25, "mar": 25, "0815224919": 25, "75d558775b": 25, "mmnv8": 25, "72": [25, 28], "248": 25, "878": [25, 37], "880": 25, "891": 25, "113": [25, 28], "userwarn": 25, "__floordiv__": 25, "round": 25, "toward": 25, "trunc": 25, "floor": 25, "keep": [25, 31, 44, 45], "div": 25, "b": [25, 28, 36, 37], "rounding_mod": 25, "divis": 25, "x_len": 25, "163": [25, 28], "\u6ede": 25, "322": 25, "759": 25, "760": 25, "919": 25, "922": 25, "929": 25, "046": 25, "047": 25, "319": [25, 28], "798": 25, "831": [25, 37], "214": [25, 28], "215": [25, 28, 32], "402": 25, "topk_hyp_index": 25, "topk_index": 25, "logit": 25, "583": [25, 37], "lji9mwuorlow3jkdhxwk8a": 26, "13t11": 26, "4454": 26, "icefall_asr_aishell_tdnn_lstm_ctc": 26, "858": [26, 28], "389": [26, 28], "154": 26, "161": [26, 28], "536": 26, "539": 26, "917": 26, "129": 26, "\u505c\u6ede": 26, "mmi": [27, 30], "blank": [27, 30], "skip": [27, 29, 30, 31, 43, 44, 45], "distil": [27, 30], "hubert": [27, 30], "ligru": [27, 35], "full": [28, 29, 31, 33, 34, 43, 44, 45], "libri": [28, 29, 31, 33, 34, 43, 44, 45], "subset": [28, 31, 33, 34, 43, 44, 45], "3x960": [28, 31, 33, 34, 43, 44, 45], "2880": [28, 31, 33, 34, 43, 44, 45], "lzgnetjwrxc3yghnmd4kpw": 28, "24t16": 28, "4540": 28, "sentenc": 28, "piec": 28, "And": [28, 31, 33, 34, 43, 44, 45], "neither": 28, "nor": 28, "5000": 28, "033": 28, "537": 28, "538": 28, "full_libri": [28, 29], "406": 28, "464": 28, "548": 28, "776": 28, "652": [28, 39], "109226120": 28, "714": [28, 36], "206": 28, "944": 28, "1328": 28, "443": [28, 32], "2563": 28, "494": 28, "592": 28, "1715": 28, "52576": 28, "128": 28, "1424": 28, "807": 28, "506": 28, "808": [28, 36], "362": 28, "1477": 28, "2922": 28, "208": 28, "4295": 28, "52343": 28, "396": 28, "3584": 28, "432": 28, "680": [28, 36], "_pickl": 28, "unpicklingerror": 28, "invalid": 28, "hlg_modifi": 28, "g_4_gram": [28, 32, 36, 37], "sentencepiec": 28, "875": [28, 32], "212k": 28, "267440": [28, 32], "1253": [28, 32], "535k": 28, "83": [28, 32], "77200": [28, 32], "154k": 28, "554": 28, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 28, "8d93169": 28, "601": 28, "025": 28, "broffel": 28, "osom": 28, "723": 28, "775": 28, "881": 28, "234": 28, "571": 28, "whole": [28, 32, 36, 37, 44, 45], "857": 28, "979": 28, "055": 28, "117": 28, "051": 28, "363": 28, "959": [28, 37], "546": 28, "598": 28, "599": [28, 32], "833": 28, "834": 28, "915": 28, "076": 28, "110": 28, "397": 28, "999": [28, 31, 44, 45], "concaten": 28, "bucket": 28, "sampler": 28, "1000": 28, "ctc_decod": 28, "ngram_lm_rescor": 28, "attention_rescor": 28, "kind": [28, 31, 33, 34, 43, 44, 45], "105": 28, "125": [28, 39], "136": 28, "228": 28, "144": 28, "543": 28, "topo": 28, "547": 28, "729": 28, "703": 28, "545": 28, "279": 28, "122": 28, "126": 28, "135": [28, 39], "153": [28, 39], "945": 28, "475": 28, "191": [28, 36, 37], "398": 28, "515": 28, "deseri": 28, "441": 28, "fsaclass": 28, "loadfsa": 28, "const": 28, "string": 28, "c10": 28, "ignor": 28, "dummi": 28, "589": 28, "attention_scal": 28, "162": 28, "169": [28, 36, 37], "188": 28, "984": 28, "624": 28, "519": [28, 37], "632": 28, "645": [28, 39], "243": 28, "303": 28, "179": 28, "knowledg": 29, "vector": 29, "mvq": 29, "kd": 29, "pruned_transducer_stateless4": [29, 31, 41, 44], "theoret": 29, "applic": 29, "minor": 29, "out": 29, "thing": 29, "distillation_with_hubert": 29, "Of": 29, "cours": 29, "xl": 29, "proce": 29, "960h": [29, 33], "use_extracted_codebook": 29, "augment": 29, "th": [29, 36, 37], "fine": 29, "embedding_lay": 29, "num_codebook": 29, "under": 29, "vq_fbank_layer36_cb8": 29, "whola": 29, "snippet": 29, "echo": 29, "awk": 29, "split": 29, "_": 29, "pruned_transducer_stateless6": 29, "12359": 29, "spec": 29, "aug": 29, "warp": 29, "paid": 29, "suitabl": [31, 43, 44, 45], "pruned_transducer_stateless2": [31, 41, 44], "pruned_transducer_stateless5": [31, 41, 44], "scroll": [31, 33, 34, 43, 44, 45], "arxiv": [31, 43, 44, 45], "2206": [31, 43, 44, 45], "13236": [31, 43, 44, 45], "rework": [31, 41, 44], "daniel": [31, 44, 45], "joint": [31, 43, 44, 45], "contrari": [31, 43, 44, 45], "convent": [31, 43, 44, 45], "recurr": [31, 43, 44, 45], "2x": [31, 44, 45], "littl": [31, 44], "436000": [31, 33, 34, 43, 44, 45], "438000": [31, 33, 34, 43, 44, 45], "qogspbgsr8kzcrmmie9jgw": 31, "20t15": [31, 43, 44], "4468": [31, 43, 44], "210171": [31, 43, 44], "access": [31, 33, 34, 43, 44, 45], "googl": [31, 33, 34, 43, 44, 45], "6008": [31, 33, 34, 43, 44, 45], "localhost": [31, 33, 34, 43, 44, 45], "expos": [31, 33, 34, 43, 44, 45], "proxi": [31, 33, 34, 43, 44, 45], "bind_al": [31, 33, 34, 43, 44, 45], "fast_beam_search": [31, 33, 43, 44, 45], "474000": [31, 43, 44, 45], "largest": [31, 44, 45], "posterior": [31, 33, 44, 45], "algorithm": [31, 44, 45], "pdf": [31, 34, 44, 45], "1211": [31, 44, 45], "3711": [31, 44, 45], "espnet": [31, 44, 45], "net": [31, 44, 45], "beam_search_transduc": [31, 44, 45], "basicli": [31, 44, 45], "topk": [31, 44, 45], "expand": [31, 44, 45], "mode": [31, 44, 45], "being": [31, 44, 45], "hardcod": [31, 44, 45], "composit": [31, 44, 45], "log_prob": [31, 44, 45], "hard": [31, 41, 44, 45], "2211": [31, 44, 45], "00484": [31, 44, 45], "fast_beam_search_lg": [31, 44, 45], "trivial": [31, 44, 45], "fast_beam_search_nbest": [31, 44, 45], "random_path": [31, 44, 45], "shortest": [31, 44, 45], "fast_beam_search_nbest_lg": [31, 44, 45], "logic": [31, 44, 45], "smallest": [31, 43, 44, 45], "normal": [32, 36, 37, 39, 44], "icefall_asr_librispeech_tdnn": 32, "lstm_ctc": 32, "flac": 32, "116k": 32, "140k": 32, "343k": 32, "164k": 32, "105k": 32, "174k": 32, "pretraind": 32, "170": 32, "584": [32, 37], "791": 32, "245": 32, "098": 32, "099": 32, "methond": [32, 36, 37], "403": 32, "631": 32, "190": 32, "121": 32, "010": 32, "guidanc": 33, "bigger": 33, "simpli": 33, "discard": 33, "prevent": 33, "lconv": 33, "encourag": [33, 34, 43], "stabil": [33, 34], "doesn": 33, "warm": [33, 34], "xyozukpeqm62hbilud4upa": [33, 34], "ctc_guide_decode_b": 33, "pretrained_ctc": 33, "jit_pretrained_ctc": 33, "100h": 33, "yfyeung": 33, "wechat": 34, "zipformer_mmi": 34, "worker": [34, 43], "hp": 34, "tdnn_ligru_ctc": 36, "enough": [36, 37, 39], "luomingshuang": [36, 37], "icefall_asr_timit_tdnn_ligru_ctc": 36, "pretrained_average_9_25": 36, "fdhc0_si1559": [36, 37], "felc0_si756": [36, 37], "fmgd0_si1564": [36, 37], "ffprobe": [36, 37], "show_format": [36, 37], "nistspher": [36, 37], "database_id": [36, 37], "database_vers": [36, 37], "utterance_id": [36, 37], "dhc0_si1559": [36, 37], "sample_min": [36, 37], "4176": [36, 37], "sample_max": [36, 37], "5984": [36, 37], "bitrat": [36, 37], "pcm_s16le": [36, 37], "s16": [36, 37], "elc0_si756": [36, 37], "1546": [36, 37], "1989": [36, 37], "mgd0_si1564": [36, 37], "7626": [36, 37], "10573": [36, 37], "660": 36, "695": 36, "697": 36, "819": 36, "829": 36, "sil": [36, 37], "dh": [36, 37], "ih": [36, 37], "uw": [36, 37], "ah": [36, 37], "ii": [36, 37], "z": [36, 37], "aa": [36, 37], "ei": [36, 37], "dx": [36, 37], "d": [36, 37], "uh": [36, 37], "ng": [36, 37], "eh": [36, 37], "jh": [36, 37], "er": [36, 37], "ai": [36, 37], "hh": [36, 37], "aw": 36, "ae": [36, 37], "705": 36, "715": 36, "720": 36, "251": [36, 37], "ch": 36, "icefall_asr_timit_tdnn_lstm_ctc": 37, "pretrained_average_16_25": 37, "816": 37, "827": 37, "unk": 37, "739": 37, "971": 37, "977": 37, "978": 37, "981": 37, "ow": 37, "ykubhb5wrmosxykid1z9eg": 39, "23t23": 39, "icefall_asr_yesno_tdnn": 39, "l_disambig": 39, "lexicon_disambig": 39, "0_0_0_1_0_0_0_1": 39, "0_0_1_0_0_0_1_0": 39, "0_0_1_0_0_1_1_1": 39, "0_0_1_0_1_0_0_1": 39, "0_0_1_1_0_0_0_1": 39, "0_0_1_1_0_1_1_0": 39, "0_0_1_1_1_0_0_0": 39, "0_0_1_1_1_1_0_0": 39, "0_1_0_0_0_1_0_0": 39, "0_1_0_0_1_0_1_0": 39, "0_1_0_1_0_0_0_0": 39, "0_1_0_1_1_1_0_0": 39, "0_1_1_0_0_1_1_1": 39, "0_1_1_1_0_0_1_0": 39, "0_1_1_1_1_0_1_0": 39, "1_0_0_0_0_0_0_0": 39, "1_0_0_0_0_0_1_1": 39, "1_0_0_1_0_1_1_1": 39, "1_0_1_1_0_1_1_1": 39, "1_0_1_1_1_1_0_1": 39, "1_1_0_0_0_1_1_1": 39, "1_1_0_0_1_0_1_1": 39, "1_1_0_1_0_1_0_0": 39, "1_1_0_1_1_0_0_1": 39, "1_1_0_1_1_1_1_0": 39, "1_1_1_0_0_1_0_1": 39, "1_1_1_0_1_0_1_0": 39, "1_1_1_1_0_0_1_0": 39, "1_1_1_1_1_0_0_0": 39, "1_1_1_1_1_1_1_1": 39, "54080": 39, "507": 39, "108k": 39, "ye": 39, "hebrew": 39, "NO": 39, "119": 39, "650": 39, "139": 39, "143": 39, "198": 39, "181": 39, "186": 39, "187": 39, "213": 39, "correctli": 39, "simplest": 39, "former": 41, "idea": 41, "mask": [41, 44, 45], "wenet": 41, "did": 41, "request": 41, "metion": 41, "complic": 41, "techniqu": 41, "bank": 41, "memor": 41, "histori": 41, "introduc": 41, "variant": 41, "pruned_stateless_emformer_rnnt2": 41, "conv_emformer_transducer_stateless": 41, "ourself": 41, "mechan": 41, "onlin": 43, "lstm_transducer_stateless": 43, "lower": 43, "prepare_giga_speech": 43, "cj2vtpiwqhkn9q1tx6ptpg": 43, "dynam": [44, 45], "causal": 44, "short": [44, 45], "2012": 44, "05481": 44, "flag": 44, "indic": [44, 45], "whether": 44, "sequenc": [44, 45], "uniformli": [44, 45], "seen": [44, 45], "97vkxf80ru61cnp2alwzzg": 44, "streaming_decod": [44, 45], "wise": [44, 45], "parallel": [44, 45], "bath": [44, 45], "parallelli": [44, 45], "seem": 44, "benefit": 44, "mdoel": 44, "320m": 45, "550": 45, "scriptmodul": 45, "jit_trace_export": 45, "jit_trace_pretrain": 45, "task": 46}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 14, 20, 21], "creat": [2, 13], "recip": [2, 46], "data": [2, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "prepar": [2, 13, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "train": [2, 10, 13, 16, 17, 18, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "decod": [2, 5, 6, 7, 13, 14, 19, 23, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "pre": [2, 10, 16, 17, 18, 19, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "model": [2, 5, 10, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "lodr": [4, 6], "rnn": 4, "transduc": [4, 6, 7, 16, 17, 18, 25, 31, 43, 44, 45], "wer": [4, 5, 6, 7, 28], "differ": [4, 6, 7], "beam": [4, 6, 7, 25], "size": [4, 6, 7], "languag": 5, "lm": [5, 6, 28], "rescor": [5, 6, 23, 28], "base": [5, 6], "method": [5, 6], "v": [5, 6], "shallow": [5, 6, 7], "fusion": [5, 6, 7], "The": [5, 6, 25], "number": [5, 6], "each": [5, 6], "field": [5, 6], "i": [5, 6], "test": [5, 6, 7, 13, 16, 17, 18], "clean": [5, 6, 7], "other": [5, 6], "time": [5, 6, 7], "frequent": 8, "ask": 8, "question": 8, "faq": 8, "oserror": 8, "libtorch_hip": 8, "so": 8, "cannot": 8, "open": 8, "share": 8, "object": 8, "file": [8, 19], "directori": 8, "attributeerror": 8, "modul": 8, "distutil": 8, "ha": 8, "attribut": 8, "version": 8, "importerror": 8, "libpython3": 8, "10": 8, "1": [8, 13, 16, 17, 18, 23, 25, 26, 28], "0": [8, 13], "No": 8, "huggingfac": [9, 11], "space": 11, "youtub": [11, 13], "video": [11, 13], "icefal": [12, 13, 16, 17, 18], "content": [12, 46], "instal": [13, 16, 17, 18, 23, 25, 26, 28, 32, 36, 37], "cuda": 13, "toolkit": 13, "cudnn": 13, "torch": [13, 16, 17, 18, 20, 21, 31, 33, 34, 43, 44, 45], "torchaudio": 13, "2": [13, 16, 17, 18, 23, 25, 26, 28], "k2": 13, "3": [13, 16, 17, 18, 23, 25, 28], "lhots": 13, "4": [13, 16, 17, 18], "download": [13, 16, 17, 18, 19, 23, 25, 26, 28, 31, 32, 33, 34, 36, 37, 39, 43, 44, 45], "exampl": [13, 19, 23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "virtual": 13, "environ": 13, "5": [13, 16, 17, 18], "6": [13, 16, 17, 18], "your": 13, "export": [14, 15, 16, 17, 18, 19, 20, 21, 22, 31, 33, 34, 43, 44, 45], "state_dict": [14, 31, 33, 34, 43, 44, 45], "when": [14, 20, 21], "us": [14, 20, 21, 31, 33, 34, 43, 44, 45], "run": 14, "py": 14, "ncnn": [15, 16, 17, 18], "convemform": 16, "pnnx": [16, 17, 18], "via": [16, 17, 18], "jit": [16, 17, 18, 20, 21, 31, 33, 34, 43, 44, 45], "trace": [16, 17, 18, 21, 43, 45], "torchscript": [16, 17, 18], "modifi": [16, 17, 18, 25], "encod": [16, 17, 18], "sherpa": [16, 17, 18, 19, 31, 44, 45], "7": [16, 17], "option": [16, 17, 23, 26, 28, 31, 33, 34, 43, 44, 45], "int8": [16, 17], "quantiz": [16, 17], "lstm": [17, 26, 32, 37, 43], "stream": [18, 27, 40, 41, 44, 45], "zipform": [18, 33, 34, 45], "onnx": 19, "sound": 19, "script": [20, 31, 33, 34, 44, 45], "conform": [23, 28, 41], "ctc": [23, 26, 28, 32, 33, 36, 37, 39], "configur": [23, 26, 28, 31, 33, 34, 43, 44, 45], "log": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "usag": [23, 25, 26, 28, 31, 33, 34, 43, 44, 45], "case": [23, 25, 26, 28], "kaldifeat": [23, 25, 26, 28, 32, 36, 37, 39], "hlg": [23, 26, 28], "attent": [23, 28], "colab": [23, 25, 26, 28, 32, 36, 37, 39], "notebook": [23, 25, 26, 28, 32, 36, 37, 39], "deploy": [23, 28], "c": [23, 28], "aishel": 24, "stateless": 25, "loss": 25, "todo": 25, "greedi": 25, "search": 25, "tdnn": [26, 32, 36, 37, 39], "non": 27, "asr": [27, 40], "comput": 28, "n": 28, "gram": 28, "distil": 29, "hubert": 29, "codebook": 29, "index": 29, "librispeech": [30, 42], "prune": [31, 44], "statelessx": [31, 44], "pretrain": [31, 33, 34, 43, 44, 45], "deploi": [31, 44, 45], "infer": [32, 36, 37, 39], "blank": 33, "skip": 33, "mmi": 34, "timit": 35, "ligru": 36, "yesno": 38, "introduct": 41, "emform": 41, "which": 43, "simul": [44, 45], "real": [44, 45], "tabl": 46}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [25, "data-preparation"]], "Training": [[2, "training"], [13, "training"], [23, "training"], [25, "training"], [26, "training"], [28, "training"], [29, "training"], [31, "training"], [32, "training"], [33, "training"], [34, "training"], [36, "training"], [37, "training"], [39, "training"], [43, "training"], [44, "training"], [45, "training"]], "Decoding": [[2, "decoding"], [13, "decoding"], [23, "decoding"], [25, "decoding"], [26, "decoding"], [28, "decoding"], [29, "decoding"], [31, "decoding"], [32, "decoding"], [33, "decoding"], [34, "decoding"], [36, "decoding"], [37, "decoding"], [39, "decoding"], [43, "decoding"], [44, "decoding"], [45, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[5, "id1"], [6, "id3"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Frequently Asked Questions (FAQs)": [[8, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[8, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[8, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[8, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "Huggingface": [[9, "huggingface"]], "Pre-trained models": [[10, "pre-trained-models"]], "Huggingface spaces": [[11, "huggingface-spaces"]], "YouTube Video": [[11, "youtube-video"], [13, "youtube-video"]], "Icefall": [[12, "icefall"]], "Contents:": [[12, null]], "Installation": [[13, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[13, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[13, "install-torch-and-torchaudio"]], "(2) Install k2": [[13, "install-k2"]], "(3) Install lhotse": [[13, "install-lhotse"]], "(4) Download icefall": [[13, "download-icefall"]], "Installation example": [[13, "installation-example"]], "(1) Create a virtual environment": [[13, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[13, "id1"]], "(3) Install torch and torchaudio": [[13, "id2"]], "(4) Install k2": [[13, "id3"]], "(5) Install lhotse": [[13, "id5"]], "(6) Download icefall": [[13, "id6"]], "Test Your Installation": [[13, "test-your-installation"]], "Data preparation": [[13, "data-preparation"], [23, "data-preparation"], [26, "data-preparation"], [28, "data-preparation"], [29, "data-preparation"], [31, "data-preparation"], [32, "data-preparation"], [33, "data-preparation"], [34, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"], [39, "data-preparation"], [43, "data-preparation"], [44, "data-preparation"], [45, "data-preparation"]], "Export model.state_dict()": [[14, "export-model-state-dict"], [31, "export-model-state-dict"], [33, "export-model-state-dict"], [34, "export-model-state-dict"], [43, "export-model-state-dict"], [44, "export-model-state-dict"], [45, "export-model-state-dict"]], "When to use it": [[14, "when-to-use-it"], [20, "when-to-use-it"], [21, "when-to-use-it"]], "How to export": [[14, "how-to-export"], [20, "how-to-export"], [21, "how-to-export"]], "How to use the exported model": [[14, "how-to-use-the-exported-model"], [20, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[14, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[15, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[16, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[16, "download-the-pre-trained-model"], [17, "download-the-pre-trained-model"], [18, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[16, "install-ncnn-and-pnnx"], [17, "install-ncnn-and-pnnx"], [18, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[16, "export-the-model-via-torch-jit-trace"], [17, "export-the-model-via-torch-jit-trace"], [18, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[16, "export-torchscript-model-via-pnnx"], [17, "export-torchscript-model-via-pnnx"], [18, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[16, "test-the-exported-models-in-icefall"], [17, "test-the-exported-models-in-icefall"], [18, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[16, "modify-the-exported-encoder-for-sherpa-ncnn"], [17, "modify-the-exported-encoder-for-sherpa-ncnn"], [18, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[16, "optional-int8-quantization-with-sherpa-ncnn"], [17, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[17, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[18, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[19, "export-to-onnx"]], "sherpa-onnx": [[19, "sherpa-onnx"]], "Example": [[19, "example"]], "Download the pre-trained model": [[19, "download-the-pre-trained-model"], [23, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [26, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [32, "download-the-pre-trained-model"], [36, "download-the-pre-trained-model"], [37, "download-the-pre-trained-model"], [39, "download-the-pre-trained-model"]], "Export the model to ONNX": [[19, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[19, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[20, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[21, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[21, "how-to-use-the-exported-models"]], "Model export": [[22, "model-export"]], "Conformer CTC": [[23, "conformer-ctc"], [28, "conformer-ctc"]], "Configurable options": [[23, "configurable-options"], [26, "configurable-options"], [28, "configurable-options"], [31, "configurable-options"], [33, "configurable-options"], [34, "configurable-options"], [43, "configurable-options"], [44, "configurable-options"], [45, "configurable-options"]], "Pre-configured options": [[23, "pre-configured-options"], [26, "pre-configured-options"], [28, "pre-configured-options"], [31, "pre-configured-options"], [33, "pre-configured-options"], [34, "pre-configured-options"], [43, "pre-configured-options"], [44, "pre-configured-options"], [45, "pre-configured-options"]], "Training logs": [[23, "training-logs"], [25, "training-logs"], [26, "training-logs"], [28, "training-logs"], [31, "training-logs"], [33, "training-logs"], [34, "training-logs"], [43, "training-logs"], [44, "training-logs"], [45, "training-logs"]], "Usage examples": [[23, "usage-examples"], [25, "usage-examples"], [26, "usage-examples"], [28, "usage-examples"]], "Case 1": [[23, "case-1"], [25, "case-1"], [26, "case-1"], [28, "case-1"]], "Case 2": [[23, "case-2"], [25, "case-2"], [26, "case-2"], [28, "case-2"]], "Case 3": [[23, "case-3"], [25, "case-3"], [28, "case-3"]], "Pre-trained Model": [[23, "pre-trained-model"], [25, "pre-trained-model"], [26, "pre-trained-model"], [28, "pre-trained-model"], [32, "pre-trained-model"], [36, "pre-trained-model"], [37, "pre-trained-model"], [39, "pre-trained-model"]], "Install kaldifeat": [[23, "install-kaldifeat"], [25, "install-kaldifeat"], [26, "install-kaldifeat"], [28, "install-kaldifeat"], [32, "install-kaldifeat"], [36, "install-kaldifeat"], [37, "install-kaldifeat"]], "Usage": [[23, "usage"], [25, "usage"], [26, "usage"], [28, "usage"]], "CTC decoding": [[23, "ctc-decoding"], [28, "ctc-decoding"], [28, "id2"]], "HLG decoding": [[23, "hlg-decoding"], [23, "id2"], [26, "hlg-decoding"], [28, "hlg-decoding"], [28, "id3"]], "HLG decoding + attention decoder rescoring": [[23, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[23, "colab-notebook"], [25, "colab-notebook"], [26, "colab-notebook"], [28, "colab-notebook"], [32, "colab-notebook"], [36, "colab-notebook"], [37, "colab-notebook"], [39, "colab-notebook"]], "Deployment with C++": [[23, "deployment-with-c"], [28, "deployment-with-c"]], "aishell": [[24, "aishell"]], "Stateless Transducer": [[25, "stateless-transducer"]], "The Model": [[25, "the-model"]], "The Loss": [[25, "the-loss"]], "Todo": [[25, "id1"]], "Greedy search": [[25, "greedy-search"]], "Beam search": [[25, "beam-search"]], "Modified Beam search": [[25, "modified-beam-search"]], "TDNN-LSTM CTC": [[26, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[27, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[28, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[28, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[28, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[28, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[28, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[29, "distillation-with-hubert"]], "Codebook index preparation": [[29, "codebook-index-preparation"]], "LibriSpeech": [[30, "librispeech"], [42, "librispeech"]], "Pruned transducer statelessX": [[31, "pruned-transducer-statelessx"], [44, "pruned-transducer-statelessx"]], "Usage example": [[31, "usage-example"], [33, "usage-example"], [34, "usage-example"], [43, "usage-example"], [44, "usage-example"], [45, "usage-example"]], "Export Model": [[31, "export-model"], [44, "export-model"], [45, "export-model"]], "Export model using torch.jit.script()": [[31, "export-model-using-torch-jit-script"], [33, "export-model-using-torch-jit-script"], [34, "export-model-using-torch-jit-script"], [44, "export-model-using-torch-jit-script"], [45, "export-model-using-torch-jit-script"]], "Download pretrained models": [[31, "download-pretrained-models"], [33, "download-pretrained-models"], [34, "download-pretrained-models"], [43, "download-pretrained-models"], [44, "download-pretrained-models"], [45, "download-pretrained-models"]], "Deploy with Sherpa": [[31, "deploy-with-sherpa"], [44, "deploy-with-sherpa"], [45, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[32, "tdnn-lstm-ctc"], [37, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[32, "inference-with-a-pre-trained-model"], [36, "inference-with-a-pre-trained-model"], [37, "inference-with-a-pre-trained-model"], [39, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[33, "zipformer-ctc-blank-skip"]], "Export models": [[33, "export-models"], [34, "export-models"], [43, "export-models"]], "Zipformer MMI": [[34, "zipformer-mmi"]], "TIMIT": [[35, "timit"]], "TDNN-LiGRU-CTC": [[36, "tdnn-ligru-ctc"]], "YesNo": [[38, "yesno"]], "TDNN-CTC": [[39, "tdnn-ctc"]], "Download kaldifeat": [[39, "download-kaldifeat"]], "Streaming ASR": [[40, "streaming-asr"]], "Introduction": [[41, "introduction"]], "Streaming Conformer": [[41, "streaming-conformer"]], "Streaming Emformer": [[41, "streaming-emformer"]], "LSTM Transducer": [[43, "lstm-transducer"]], "Which model to use": [[43, "which-model-to-use"]], "Export model using torch.jit.trace()": [[43, "export-model-using-torch-jit-trace"], [45, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[44, "simulate-streaming-decoding"], [45, "simulate-streaming-decoding"]], "Real streaming decoding": [[44, "real-streaming-decoding"], [45, "real-streaming-decoding"]], "Zipformer Transducer": [[45, "zipformer-transducer"]], "Recipes": [[46, "recipes"]], "Table of Contents": [[46, null]]}, "indexentries": {}}) |