icefall/searchindex.js

1 line
92 KiB
JavaScript

Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "us": [0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 14, 17, 18, 19, 20, 21, 23, 24, 27, 31, 32, 34, 36], "tool": [0, 4, 12], "make": [0, 1, 3, 12, 13, 18, 20, 23, 36], "consist": [0, 20, 26, 38, 39, 40], "possibl": [0, 2, 3, 9, 18, 23], "black": 0, "format": [0, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "flake8": 0, "check": [0, 23], "qualiti": [0, 19], "isort": 0, "sort": [0, 9], "import": [0, 4, 12, 39, 40], "The": [0, 1, 2, 4, 7, 9, 10, 12, 13, 18, 19, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "version": [0, 8, 9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 31, 32, 39], "abov": [0, 4, 9, 10, 12, 13, 14, 18, 19, 20, 21, 23, 26, 28, 29, 34, 36, 38, 39, 40], "ar": [0, 1, 3, 4, 9, 10, 12, 13, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40, 41], "22": [0, 12, 13, 23, 31, 32, 34], "3": [0, 4, 8, 10, 11, 14, 17, 21, 24, 26, 27, 28, 29, 34, 38, 39, 40], "0": [0, 1, 8, 10, 12, 13, 14, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "5": [0, 11, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "4": [0, 4, 10, 11, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "10": [0, 8, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "1": [0, 8, 10, 11, 14, 15, 16, 17, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "after": [0, 1, 7, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "run": [0, 2, 4, 7, 9, 12, 13, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "command": [0, 1, 4, 9, 10, 12, 13, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "git": [0, 9, 10, 12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34], "clone": [0, 9, 10, 12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "github": [0, 2, 6, 9, 10, 11, 12, 13, 15, 16, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "com": [0, 2, 6, 7, 9, 10, 12, 13, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "k2": [0, 2, 4, 6, 7, 8, 10, 11, 12, 13, 15, 16, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 38, 39, 40], "fsa": [0, 2, 6, 7, 9, 10, 11, 12, 13, 15, 16, 18, 20, 23, 26, 28, 29, 38, 39, 40], "icefal": [0, 2, 3, 4, 6, 7, 10, 11, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "cd": [0, 1, 2, 4, 9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "pip": [0, 1, 4, 9, 12, 20], "instal": [0, 1, 4, 5, 7, 8, 10, 11, 14, 17, 24, 26, 28, 29, 34, 38, 39, 40], "pre": [0, 3, 5, 7, 8, 9, 11, 17, 24], "commit": 0, "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "automat": [0, 7, 24], "hook": 0, "invok": 0, "fail": [0, 9], "If": [0, 2, 4, 7, 9, 12, 13, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "ani": [0, 9, 18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39], "your": [0, 1, 2, 5, 7, 8, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "wa": [0, 9, 10, 23, 27], "success": [0, 9, 12, 13], "pleas": [0, 1, 2, 4, 7, 9, 11, 12, 13, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "fix": [0, 4, 9, 12, 13, 23], "issu": [0, 4, 9, 12, 13, 23, 24, 39, 40], "report": [0, 4, 9, 24], "some": [0, 1, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "i": [0, 1, 2, 4, 7, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "e": [0, 2, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "modifi": [0, 11, 17, 18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "file": [0, 2, 7, 8, 10, 12, 13, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "place": [0, 9, 10, 20, 23, 27], "so": [0, 7, 8, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "statu": 0, "failur": 0, "see": [0, 1, 7, 9, 12, 13, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "which": [0, 2, 7, 10, 12, 13, 14, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 39, 40], "ha": [0, 2, 8, 11, 12, 13, 14, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 36, 38, 39, 40], "been": [0, 11, 12, 13, 20], "befor": [0, 1, 10, 12, 13, 15, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "further": 0, "chang": [0, 4, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "all": [0, 6, 7, 10, 12, 13, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "again": [0, 12, 13, 34], "should": [0, 2, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "succe": 0, "thi": [0, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "time": [0, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "succeed": 0, "want": [0, 9, 10, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "can": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "do": [0, 2, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "Or": 0, "without": [0, 5, 7, 18, 23], "your_changed_fil": 0, "py": [0, 2, 4, 9, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 6, 7, 9, 10, 12, 13, 14, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "prepar": [1, 3, 10], "environ": [1, 4, 12, 13, 18, 19, 20, 21, 23, 24, 26, 27, 31, 32, 34, 39, 40], "doc": [1, 10, 36], "r": [1, 9, 12, 13, 31, 32], "requir": [1, 9, 24, 39, 40], "txt": [1, 9, 12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34], "set": [1, 4, 9, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "up": [1, 9, 10, 12, 13, 18, 21, 23, 24, 26, 27, 28, 29, 39, 40], "readi": [1, 18, 23, 24], "refer": [1, 2, 9, 10, 11, 12, 13, 15, 16, 18, 20, 21, 23, 26, 27, 28, 31, 32, 34, 36, 39, 40], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 10, 12, 13, 18, 20, 23], "local": [1, 9, 26, 28, 29, 38, 39, 40], "preview": 1, "what": [1, 2, 9, 12, 13, 20, 36], "look": [1, 2, 6, 9, 12, 13, 18, 20, 21, 23, 24], "like": [1, 2, 7, 9, 12, 13, 18, 20, 21, 23, 26, 28, 29, 34, 36, 38, 39], "publish": [1, 10, 19], "html": [1, 2, 4, 9, 11, 12, 13, 15, 16, 26, 38, 39, 40], "gener": [1, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "view": [1, 12, 13, 18, 20, 21, 23, 26, 28, 29, 34, 38, 39, 40], "follow": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "python3": [1, 4, 9, 13], "m": [1, 12, 13, 20, 26, 28, 29, 31, 32, 38, 39, 40], "server": [1, 7, 9, 38], "It": [1, 2, 5, 9, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "print": [1, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "serv": [1, 26, 28, 29, 38, 39, 40], "port": [1, 24, 26, 28, 29, 38, 39, 40], "8000": [1, 34], "open": [1, 8, 10, 12, 13, 19, 20, 23, 24], "browser": [1, 5, 7, 26, 28, 29, 38, 39, 40], "go": [1, 18, 20, 23, 26, 28, 29, 38, 39, 40], "read": [2, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "code": [2, 3, 4, 8, 12, 13, 18, 23, 24, 26, 27, 31, 32, 34, 36, 39, 40], "style": [2, 3, 8], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 10, 12, 13, 14, 15, 16, 18, 20, 23, 26, 28, 29, 38, 39, 40], "recommend": [2, 9, 18, 20, 21, 23, 24, 26, 39, 40], "test": [2, 8, 10, 11, 17, 18, 20, 21, 23, 24, 27, 28, 31, 32], "valid": [2, 9, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "dataset": [2, 4, 9, 10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "lhots": [2, 8, 10, 12, 13, 18, 20, 23], "readthedoc": [2, 9], "io": [2, 9, 11, 12, 13, 15, 16, 26, 38, 39, 40], "en": [2, 9, 12], "latest": [2, 7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "index": [2, 9, 11, 12, 13, 15, 16, 38, 39, 40], "yesno": [2, 4, 8, 9, 22, 34, 41], "veri": [2, 3, 12, 13, 20, 31, 32, 34, 39, 40], "good": 2, "exampl": [2, 7, 8, 10, 12, 13, 15, 16, 17, 24, 27, 31, 32, 34], "speech": [2, 7, 8, 9, 11, 19, 20, 34, 41], "pull": [2, 12, 13, 14, 18, 20, 23, 36], "380": [2, 12, 32], "show": [2, 7, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "add": [2, 12, 13, 18, 20, 21, 39, 41], "new": [2, 3, 7, 9, 12, 13, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 34, 38, 39, 40], "suppos": [2, 39, 40], "would": [2, 9, 10, 12, 13, 23, 27, 39, 40], "name": [2, 4, 10, 12, 13, 14, 18, 20, 26, 28, 29, 39, 40], "foo": [2, 16, 18, 23, 26, 28, 29, 38, 39, 40], "eg": [2, 4, 6, 9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "mkdir": [2, 12, 13, 18, 20, 21, 23, 27, 31, 32, 34], "p": [2, 9, 12, 13, 20, 31, 32], "asr": [2, 4, 6, 8, 9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "touch": 2, "sh": [2, 9, 10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "chmod": 2, "x": [2, 36], "simpl": [2, 20], "own": [2, 24, 26, 39, 40], "otherwis": [2, 12, 13, 18, 20, 23, 24, 26, 28, 29, 38, 39, 40], "librispeech": [2, 4, 6, 8, 10, 12, 13, 14, 15, 16, 22, 23, 24, 26, 27, 28, 29, 35, 36, 38, 39, 40, 41], "assum": [2, 9, 10, 12, 13, 14, 18, 20, 21, 23, 24, 26, 27, 31, 32, 34, 38, 39, 40], "fanci": 2, "call": [2, 4, 14, 24], "bar": [2, 16, 18, 23, 26, 28, 29, 38, 39, 40], "organ": 2, "wai": [2, 3, 17, 26, 28, 29, 36, 38, 39, 40], "readm": [2, 18, 20, 21, 23, 27, 31, 32, 34], "md": [2, 6, 10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "asr_datamodul": [2, 4, 9], "pretrain": [2, 10, 12, 13, 14, 16, 18, 20, 21, 23, 27, 31, 32, 34], "For": [2, 4, 6, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "instanc": [2, 4, 6, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "tdnn": [2, 4, 9, 19, 22, 25, 30, 33], "its": [2, 10, 11, 12, 13, 16, 20, 28], "directori": [2, 8, 9, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "structur": 2, "descript": [2, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "contain": [2, 8, 10, 11, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40, 41], "inform": [2, 10, 18, 20, 21, 23, 26, 27, 28, 31, 32, 34, 36, 38, 39, 40], "g": [2, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "wer": [2, 9, 10, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "etc": [2, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "provid": [2, 7, 9, 10, 11, 12, 13, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40, 41], "pytorch": [2, 4, 8, 12, 13, 20], "dataload": [2, 9], "take": [2, 10, 24, 26, 34, 39, 40], "input": [2, 10, 12, 13, 18, 20, 21, 23, 27, 31, 32, 34, 36], "checkpoint": [2, 9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "save": [2, 9, 10, 13, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "dure": [2, 4, 7, 10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "stage": [2, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "": [2, 9, 10, 12, 13, 14, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "definit": [2, 12, 13], "neural": [2, 18, 23], "network": [2, 18, 20, 23, 26, 28, 29, 38, 39, 40], "script": [2, 8, 9, 16, 17, 18, 20, 21, 23, 24, 27, 31, 32, 34, 38], "infer": [2, 10, 12, 13], "tdnn_lstm_ctc": [2, 21, 27, 32], "conformer_ctc": [2, 18, 23], "get": [2, 7, 9, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 34, 36, 38, 39, 40], "feel": [2, 24, 38], "result": [2, 6, 7, 9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "everi": [2, 10, 26, 28, 29, 38, 39, 40], "kept": [2, 26, 39, 40], "self": [2, 11, 36], "toler": 2, "duplic": 2, "among": [2, 9], "differ": [2, 9, 12, 13, 14, 18, 19, 23, 24, 26, 36, 38, 39, 40], "invoc": [2, 12, 13], "help": [2, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "blob": [2, 6, 10, 16, 26, 28, 29, 38, 39, 40], "master": [2, 6, 10, 13, 15, 16, 20, 24, 26, 28, 29, 38, 39, 40], "transform": [2, 18, 23, 38], "conform": [2, 15, 19, 20, 22, 25, 26, 28, 38, 39, 40], "base": [2, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "lstm": [2, 11, 16, 17, 19, 22, 25, 30, 35, 37], "attent": [2, 20, 21, 24, 36, 39, 40], "lm": [2, 9, 20, 26, 27, 31, 32, 34, 39, 40], "rescor": [2, 21, 27, 29, 31, 32, 34], "demonstr": [2, 5, 7, 10, 14], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 12, 13, 14, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "mani": [3, 39, 40], "two": [3, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "them": [3, 5, 6, 7, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "To": [3, 7, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "document": [3, 8, 10, 11, 12, 13, 29], "repositori": [3, 12, 13], "recip": [3, 6, 8, 9, 10, 14, 18, 20, 21, 23, 24, 26, 27, 31, 32, 34, 36, 38, 39, 40], "In": [3, 4, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 27, 31, 32, 34, 36], "page": [3, 7, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "describ": [3, 5, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 26, 27, 31, 32, 39, 40], "how": [3, 5, 7, 8, 9, 12, 13, 14, 17, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "creat": [3, 8, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39], "data": [3, 10, 12, 13, 14, 15, 16, 19], "train": [3, 4, 5, 7, 8, 10, 11, 15, 16, 17, 36], "decod": [3, 4, 7, 12, 13, 16, 17], "model": [3, 5, 7, 8, 9, 11, 24, 36], "section": [4, 5, 9, 10, 14, 15, 16, 17, 18, 23], "collect": [4, 9], "user": 4, "post": 4, "correspond": [4, 6, 7], "solut": 4, "One": 4, "torch": [4, 8, 9, 10, 11, 17, 18, 20, 23], "torchaudio": [4, 8, 36], "cu111": 4, "torchvis": 4, "11": [4, 9, 12, 13, 14, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "f": [4, 9, 31, 32], "download": [4, 7, 8, 11, 17, 19, 24], "org": [4, 9, 19, 20, 26, 38, 39, 40], "whl": [4, 9], "torch_stabl": 4, "throw": [4, 12, 13], "error": [4, 9, 12, 13, 23], "when": [4, 7, 12, 13, 17, 20, 23, 24, 26, 28, 29, 39, 40], "specifi": [4, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "cuda": [4, 9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 38, 39, 40], "while": [4, 9, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "That": [4, 12, 13, 24, 26, 38, 39, 40], "cu11": 4, "therefor": 4, "correct": 4, "log": [4, 9, 12, 13, 27, 31, 32, 34], "traceback": 4, "most": [4, 39, 40], "recent": [4, 12, 13], "last": 4, "line": [4, 9, 12, 13, 26, 39, 40], "14": [4, 9, 10, 12, 13, 15, 18, 23, 26, 27, 28, 31, 38, 39, 40], "from": [4, 5, 7, 9, 10, 12, 13, 14, 18, 19, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "yesnoasrdatamodul": 4, "home": [4, 12, 13, 18, 23], "xxx": [4, 10, 12, 13], "next": [4, 7, 9, 12, 13, 23, 24, 26, 27, 28, 29, 38, 39, 40], "gen": [4, 7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "kaldi": [4, 7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "34": [4, 9, 12, 13], "datamodul": 4, "__init__": [4, 10, 12, 13, 18, 20, 23], "23": [4, 9, 12, 13, 18, 20, 21, 23, 31, 32, 34], "util": [4, 9, 23], "add_eo": 4, "add_so": 4, "get_text": 4, "39": [4, 9, 12, 20, 23, 27, 31], "tensorboard": [4, 9, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "summarywrit": 4, "miniconda3": 4, "env": 4, "yyi": 4, "lib": [4, 9], "8": [4, 9, 10, 12, 13, 18, 20, 23, 24, 26, 27, 28, 29, 34, 38, 39, 40], "site": [4, 9], "packag": [4, 9], "loosevers": 4, "uninstal": 4, "setuptool": [4, 9], "58": [4, 23], "conda": 4, "encount": [4, 9, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "dev": [4, 9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "yangyifan": 4, "anaconda3": 4, "dev20230112": 4, "cuda11": 4, "6": [4, 9, 11, 17, 18, 20, 23, 26, 27, 31, 32, 38], "torch1": [4, 9], "13": [4, 9, 10, 12, 13, 20, 21, 23, 27, 28, 31], "py3": [4, 9], "linux": [4, 7, 11, 12, 13], "x86_64": [4, 12], "egg": 4, "24": [4, 9, 12, 13, 21, 27, 31, 32, 34], "_k2": [4, 9], "determinizeweightpushingtyp": 4, "handl": [4, 18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "except": [4, 10], "anoth": 4, "occur": 4, "pruned_transducer_stateless7_ctc_b": [4, 28], "104": 4, "30": [4, 9, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "rais": 4, "note": [4, 10, 12, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "re": [4, 18, 21, 23, 24, 26, 28, 29, 36, 38, 39, 40], "anaconda": 4, "maco": [4, 7, 11, 12, 13], "probabl": [4, 9, 20, 26, 28, 38, 39, 40], "variabl": [4, 9, 12, 13, 18, 21, 23, 24, 26, 28, 29, 38, 39, 40], "export": [4, 8, 9, 18, 20, 21, 23, 24, 27, 31, 32, 34], "dyld_library_path": 4, "conda_prefix": 4, "first": [4, 9, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "try": [4, 5, 7, 24, 26, 28, 29, 38, 39, 40], "find": [4, 5, 6, 7, 9, 10, 12, 13, 16, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "where": [4, 39], "locat": [4, 12], "libpython": 4, "abl": 4, "insid": [4, 16], "codna_prefix": 4, "ld_library_path": 4, "also": [5, 6, 9, 10, 11, 12, 13, 14, 16, 18, 20, 21, 23, 26, 28, 29, 34, 36, 38, 39, 40], "within": [5, 7, 12, 13], "anyth": [5, 7], "space": [5, 8], "youtub": [5, 8, 23, 24, 26, 27, 28, 29, 38, 39, 40], "video": [5, 8, 23, 24, 26, 27, 28, 29, 38, 39, 40], "upload": [6, 7, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "huggingfac": [6, 8, 10, 12, 13, 14, 18, 20, 21, 23, 27, 28, 29, 31, 32, 34, 38], "co": [6, 7, 10, 12, 13, 14, 18, 19, 20, 21, 23, 27, 28, 29, 31, 32, 34, 38], "visit": [6, 7, 26, 28, 29, 38, 39, 40], "link": [6, 9, 10, 11, 26, 28, 29, 38, 39, 40], "search": [6, 7], "specif": [6, 14, 20], "aishel": [6, 8, 18, 20, 21, 22, 41], "gigaspeech": [6, 15, 38], "wenetspeech": [6, 15], "integr": 7, "framework": [7, 26, 39], "sherpa": [7, 11, 15, 16, 17, 38], "need": [7, 9, 10, 11, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "window": [7, 11, 12, 13], "even": [7, 9, 13], "ipad": 7, "phone": 7, "start": [7, 9, 10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "address": [7, 10, 12, 13, 20, 26, 29, 38, 39, 40], "recognit": [7, 8, 11, 12, 13, 19, 20, 34, 41], "screenshot": [7, 18, 20, 21, 23, 24, 26, 34, 38, 39], "select": [7, 12, 13, 26, 27, 31, 32, 34, 38, 39, 40], "languag": [7, 18, 20, 21], "current": [7, 9, 12, 13, 20, 24, 36, 38, 39, 40, 41], "chines": [7, 19, 20], "english": [7, 34, 38], "target": 7, "method": [7, 9, 10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 38, 39, 40], "greedi": 7, "modified_beam_search": [7, 20, 24, 26, 28, 38, 39, 40], "choos": [7, 9, 24, 26, 28, 29, 38, 39, 40], "number": [7, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "activ": 7, "path": [7, 10, 12, 13, 16, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "either": [7, 18, 20, 21, 23, 39, 40], "record": [7, 13, 18, 19, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "click": [7, 9, 18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "button": 7, "submit": 7, "wait": 7, "moment": 7, "an": [7, 9, 10, 12, 13, 14, 15, 16, 18, 19, 20, 23, 24, 26, 29, 34, 38, 39, 40], "bottom": [7, 26, 28, 29, 38, 39, 40], "part": [7, 9, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "tabl": [7, 12, 13], "one": [7, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "subscrib": [7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "channel": [7, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "nadira": [7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "povei": [7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "www": [7, 9, 19, 23, 24, 26, 27, 28, 29, 38, 39, 40], "uc_vaumpkminz1pnkfxan9mw": [7, 9, 23, 24, 26, 27, 28, 29, 38, 39, 40], "2": [8, 10, 11, 17, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "frequent": 8, "ask": 8, "question": 8, "faq": 8, "oserror": 8, "libtorch_hip": 8, "cannot": [8, 12, 13], "share": [8, 9], "object": [8, 9, 18, 20, 21, 26, 34, 38, 39], "attributeerror": 8, "modul": [8, 9, 12, 28, 39], "distutil": 8, "attribut": [8, 23], "importerror": 8, "libpython3": 8, "No": [8, 12, 13, 34], "state_dict": [8, 17, 18, 20, 21, 23, 27, 31, 32, 34], "jit": [8, 11, 17, 23], "trace": [8, 11, 15, 17], "onnx": [8, 10, 17], "ncnn": [8, 17], "non": [8, 23, 36, 39, 41], "stream": [8, 12, 13, 14, 18, 23, 31, 32, 38, 41], "timit": [8, 22, 31, 32, 41], "introduct": [8, 35, 41], "contribut": 8, "depend": [9, 18, 23], "step": [9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "order": [9, 12, 13, 18, 21, 23, 27, 31, 32], "matter": [9, 12], "least": 9, "v1": [9, 18, 21, 23, 27, 31, 32], "9": [9, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 34, 38, 39, 40], "alreadi": [9, 10], "don": [9, 12, 13, 15, 18, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "t": [9, 12, 13, 14, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "replac": [9, 12, 13], "compil": [9, 12, 13, 18, 20, 23], "against": 9, "strongli": 9, "pythonpath": [9, 12, 13], "point": [9, 10, 18, 21, 23, 24, 26, 28, 29, 38, 39, 40], "folder": [9, 10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "tmp": [9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "setup": [9, 12, 18, 20, 21, 23, 24, 26, 27, 31, 32, 34, 39, 40], "put": [9, 12, 13, 28, 39], "sever": [9, 10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "same": [9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "switch": [9, 18, 23, 29], "just": [9, 12, 13, 36], "about": [9, 12, 13, 20, 24, 26, 29, 38, 39, 40], "virtualenv": 9, "cpython3": 9, "final": [9, 10, 12, 13, 23, 27], "64": [9, 10, 12, 20, 39], "1540m": 9, "creator": 9, "cpython3posix": 9, "dest": 9, "ceph": [9, 10, 18, 20, 23], "fj": [9, 10, 12, 13, 20, 23], "fangjun": [9, 10, 12, 13, 20, 23], "clear": 9, "fals": [9, 10, 12, 13, 18, 20, 23, 24], "no_vcs_ignor": 9, "global": 9, "seeder": 9, "fromappdata": 9, "bundl": 9, "wheel": [9, 12], "via": [9, 11, 15, 16, 17], "copi": [9, 36], "app_data_dir": 9, "root": [9, 12, 13], "v": [9, 12, 13, 23, 31, 32], "irtualenv": 9, "ad": [9, 12, 13, 18, 20, 21, 23, 26, 28, 29, 34, 36, 38, 39, 40], "seed": 9, "21": [9, 10, 12, 18, 20, 23, 31, 32], "57": [9, 13, 23, 27], "36": [9, 12, 20, 23, 24], "bashactiv": 9, "cshellactiv": 9, "fishactiv": 9, "powershellactiv": 9, "pythonactiv": 9, "xonshactiv": 9, "sourc": [9, 10, 12, 13, 18, 19, 20, 23], "bin": [9, 12, 13, 18, 23], "dev20210822": 9, "cpu": [9, 10, 12, 13, 15, 18, 26, 28, 29, 34, 39, 40], "nightli": 9, "2bcpu": 9, "cp38": 9, "linux_x86_64": 9, "mb": [9, 12, 13], "________________________________": 9, "185": [9, 18, 23, 34], "kb": [9, 12, 13, 31, 32], "graphviz": 9, "17": [9, 10, 12, 13, 18, 23, 31, 32, 38], "none": [9, 18, 23], "18": [9, 12, 13, 18, 20, 21, 23, 26, 27, 31, 32, 38, 39, 40], "cach": 9, "manylinux1_x86_64": 9, "831": [9, 20, 32], "type": [9, 10, 12, 13, 18, 20, 23, 26, 28, 29, 34, 36, 38, 39, 40], "extens": 9, "typing_extens": 9, "26": [9, 12, 13, 20, 23, 32], "successfulli": [9, 12, 13], "req": 9, "7b1b76ge": 9, "q": 9, "audioread": 9, "soundfil": 9, "post1": 9, "py2": 9, "7": [9, 10, 11, 17, 18, 21, 23, 26, 27, 31, 32, 38, 39], "97": [9, 12, 18], "cytoolz": 9, "manylinux_2_17_x86_64": 9, "manylinux2014_x86_64": 9, "dataclass": 9, "h5py": 9, "manylinux_2_12_x86_64": 9, "manylinux2010_x86_64": 9, "684": [9, 18, 34], "intervaltre": 9, "lilcom": 9, "numpi": 9, "15": [9, 10, 12, 13, 20, 21, 23, 31, 34], "40": [9, 12, 13, 21, 23, 27, 31, 32], "pyyaml": 9, "662": 9, "tqdm": 9, "62": [9, 23, 27], "76": [9, 34], "73": 9, "satisfi": 9, "2a1410b": 9, "clean": [9, 18, 20, 23, 24, 26, 27, 28, 29, 38, 39, 40], "toolz": 9, "55": [9, 12, 21, 23, 31], "sortedcontain": 9, "29": [9, 14, 18, 20, 21, 23, 27, 28, 31, 32], "cffi": 9, "411": [9, 23], "pycpars": 9, "20": [9, 10, 12, 18, 20, 21, 23, 26, 27, 31, 32, 34, 39], "112": [9, 12, 13], "pypars": 9, "67": 9, "done": [9, 10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "filenam": [9, 12, 13, 14, 15, 16, 28, 29, 38, 40], "dev_2a1410b_clean": 9, "size": [9, 10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "342242": 9, "sha256": 9, "f683444afa4dc0881133206b4646a": 9, "9d0f774224cc84000f55d0a67f6e4a37997": 9, "store": [9, 23], "ephem": 9, "ftu0qysz": 9, "7f": 9, "7a": 9, "8e": 9, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 9, "warn": 9, "built": 9, "invalid": [9, 23], "metadata": [9, 31, 32], "mandat": 9, "pep": 9, "440": 9, "packa": 9, "ging": 9, "deprec": [9, 20], "legaci": 9, "becaus": 9, "could": [9, 12, 13, 18, 21], "A": [9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 38, 39, 40], "discuss": 9, "regard": 9, "pypa": 9, "sue": 9, "8368": 9, "inter": 9, "valtre": 9, "sor": 9, "tedcontain": 9, "remot": 9, "enumer": 9, "500": [9, 10, 12, 13, 20, 23, 29, 38], "count": 9, "100": [9, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "compress": 9, "308": [9, 18, 20, 21], "total": [9, 13, 18, 20, 21, 23, 24, 26, 27, 34, 38, 39], "delta": 9, "263": [9, 13], "reus": 9, "307": 9, "102": [9, 18], "pack": [9, 39, 40], "receiv": 9, "172": 9, "49": [9, 12, 13, 23, 32, 34], "kib": 9, "385": 9, "00": [9, 12, 18, 20, 21, 23, 27, 31, 32, 34], "resolv": 9, "kaldilm": 9, "tar": 9, "gz": 9, "48": [9, 12, 13, 18, 20], "574": 9, "kaldialign": 9, "sentencepiec": [9, 23], "96": 9, "41": [9, 12, 18, 20, 31, 34], "absl": 9, "absl_pi": 9, "132": 9, "googl": [9, 26, 28, 29, 38, 39, 40], "auth": 9, "oauthlib": 9, "google_auth_oauthlib": 9, "grpcio": 9, "ment": 9, "12": [9, 10, 12, 13, 14, 18, 20, 21, 23, 26, 28, 29, 31, 34, 38, 39, 40], "requi": 9, "rement": 9, "protobuf": 9, "manylinux_2_5_x86_64": 9, "werkzeug": 9, "288": 9, "tensorboard_data_serv": 9, "google_auth": 9, "35": [9, 10, 12, 13, 20, 23, 38], "152": 9, "request": [9, 36], "plugin": 9, "wit": 9, "tensorboard_plugin_wit": 9, "781": 9, "markdown": 9, "six": 9, "16": [9, 10, 12, 13, 16, 18, 20, 21, 23, 26, 27, 31, 32, 34, 38, 39, 40], "cachetool": 9, "rsa": 9, "pyasn1": 9, "pyasn1_modul": 9, "155": 9, "requests_oauthlib": 9, "77": [9, 23], "urllib3": 9, "27": [9, 12, 13, 18, 20, 27, 32], "138": [9, 18, 20], "certifi": 9, "2017": 9, "2021": [9, 18, 21, 23, 27, 31, 32, 34], "145": 9, "charset": 9, "normal": [9, 27, 31, 32, 34, 39], "charset_norm": 9, "idna": 9, "59": [9, 12, 21, 23], "146": 9, "897233": 9, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 9, "f4c0d0cbc66eee6c88d68a63862": 9, "85": 9, "7d": 9, "63": [9, 20], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 9, "etool": 9, "oaut": 9, "hlib": 9, "let": [9, 12, 13, 18, 23], "u": [9, 12, 13, 18, 20, 21, 23, 24, 34], "08": [9, 23, 27, 29, 31, 32, 34, 38], "19": [9, 10, 12, 13, 18, 23, 27, 31, 32], "main": [9, 18, 23, 36], "dl_dir": [9, 18, 21, 23, 24, 26, 28, 29, 38, 39, 40], "waves_yesno": 9, "49mb": 9, "03": [9, 10, 13, 20, 23, 31, 32, 38], "39mb": 9, "manifest": [9, 24], "31": [9, 12, 13, 23], "42": [9, 13, 18, 23, 34], "comput": [9, 10, 12, 13, 18, 20, 21, 24, 26, 27, 29, 31, 32, 34, 38, 39, 40], "fbank": [9, 10, 12, 13, 18, 20, 21, 23, 27, 31, 32, 34], "32": [9, 12, 13, 14, 18, 20, 21, 40], "803": 9, "info": [9, 10, 12, 13, 18, 20, 21, 23, 27, 31, 32, 34], "compute_fbank_yesno": 9, "52": [9, 18, 23], "process": [9, 10, 12, 13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "extract": [9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "featur": [9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "_______________________________________________________________": 9, "90": [9, 12], "01": [9, 12, 20, 21, 23, 24, 28], "80": [9, 10, 12, 13, 18, 20, 23], "57it": 9, "085": 9, "______________________________________________________________": 9, "248": [9, 20], "21it": 9, "lang": [9, 10, 20, 23, 29], "fcordre9": 9, "kaldilm_6899d26f2d684ad48f21025950cd2866": 9, "csrc": [9, 23], "arpa_file_pars": 9, "cc": 9, "void": 9, "arpafilepars": 9, "rea": 9, "d": [9, 31, 32], "std": 9, "istream": 9, "79": 9, "140": [9, 21], "gram": [9, 18, 20, 21, 26, 27, 29, 31, 32, 39, 40], "89": [9, 18], "hlg": [9, 27, 31, 32, 34], "928": 9, "compile_hlg": 9, "120": 9, "lang_phon": [9, 21, 27, 31, 32, 34], "929": [9, 20], "lexicon": [9, 18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "116": 9, "convert": [9, 12, 13, 23], "l": [9, 12, 13, 20, 31, 32, 34], "pt": [9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "linv": [9, 20, 23, 34], "931": 9, "ctc_topo": 9, "max_token_id": 9, "932": 9, "load": [9, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "fst": [9, 20, 34], "intersect": [9, 26, 39, 40], "933": 9, "lg": [9, 26, 29, 39, 40], "shape": 9, "66": [9, 13], "connect": [9, 10, 23, 26, 27, 38, 39, 40], "68": [9, 23], "70": 9, "class": [9, 23], "tensor": [9, 13, 18, 20, 21, 23, 26, 34, 38, 39], "71": [9, 23, 27], "determin": 9, "934": 9, "74": [9, 10], "raggedint": 9, "remov": [9, 18, 20, 21, 23, 27, 31, 32], "disambigu": 9, "symbol": [9, 20, 26, 39, 40], "87": [9, 12], "remove_epsilon": 9, "935": 9, "92": [9, 23], "arc": 9, "95": [9, 19], "compos": 9, "h": 9, "105": [9, 23], "936": 9, "107": [9, 13, 27], "123": 9, "now": [9, 12, 13, 18, 23, 24, 26, 27, 28, 29, 31, 32, 38, 39, 40], "cuda_visible_devic": [9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "gpu": [9, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 31, 32, 34, 38, 39, 40], "avail": [9, 10, 12, 13, 18, 20, 23, 27, 31, 32, 34, 38], "case": [9, 10, 12, 13, 26, 28, 29, 38, 39, 40], "segment": 9, "fault": 9, "core": 9, "dump": 9, "protocol_buffers_python_implement": 9, "more": [9, 12, 13, 18, 23, 24, 34, 36, 38, 39], "674": 9, "interest": [9, 24, 26, 28, 29, 38, 39, 40], "given": [9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 39, 40], "below": [9, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39], "072": 9, "465": 9, "466": 9, "exp_dir": [9, 12, 13, 20, 23, 24, 26, 28, 29, 39, 40], "posixpath": [9, 12, 13, 20, 23], "exp": [9, 10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "lang_dir": [9, 20, 23], "lr": [9, 20, 38], "feature_dim": [9, 10, 12, 13, 18, 20, 23, 34], "weight_decai": 9, "1e": 9, "06": [9, 10, 12, 21, 23, 27, 34], "start_epoch": 9, "best_train_loss": [9, 10, 12, 13], "inf": [9, 10, 12, 13], "best_valid_loss": [9, 10, 12, 13], "best_train_epoch": [9, 10, 12, 13], "best_valid_epoch": [9, 10, 13], "batch_idx_train": [9, 10, 12, 13], "log_interv": [9, 10, 12, 13], "valid_interv": [9, 10, 12, 13], "beam_siz": [9, 10, 20], "reduct": [9, 12, 13, 28], "sum": 9, "use_doub": 9, "le_scor": 9, "true": [9, 10, 12, 13, 18, 20, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "world_siz": [9, 24], "master_port": 9, "12354": 9, "num_epoch": 9, "feature_dir": [9, 23], "max_dur": [9, 23], "bucketing_sampl": [9, 23], "num_bucket": [9, 23], "concatenate_cut": [9, 23], "duration_factor": [9, 23], "gap": [9, 23], "on_the_fly_feat": [9, 23], "shuffl": [9, 23], "return_cut": [9, 23], "num_work": [9, 23], "074": 9, "113": [9, 20, 23], "098": [9, 27], "cut": [9, 23], "240": [9, 18, 34], "149": [9, 12, 23], "200": [9, 10, 12, 13, 18, 23, 24, 31, 32, 34], "singlecutsampl": 9, "206": [9, 23], "219": [9, 20, 23], "246": [9, 20, 23, 31, 32], "357": 9, "416": 9, "epoch": [9, 10, 12, 13, 14, 15, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "batch": [9, 12, 13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "avg": [9, 10, 12, 13, 14, 15, 16, 20, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "loss": [9, 12, 13, 18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "0789": 9, "848": 9, "5356": 9, "7556": 9, "301": [9, 10, 23], "432": [9, 23], "9972": 9, "best": [9, 12, 13, 18, 21, 23], "805": 9, "2436": 9, "5717": 9, "33": [9, 12, 13, 18, 19, 20, 23, 31], "109": [9, 18, 23], "4167": 9, "121": [9, 27], "325": [9, 13], "2214": 9, "798": [9, 20], "0781": 9, "1343": 9, "065": 9, "0859": 9, "556": 9, "0421": 9, "0975": 9, "810": 9, "0431": 9, "824": 9, "657": 9, "0109": 9, "984": [9, 23], "0093": 9, "0096": 9, "50": [9, 10, 12, 13, 23, 26, 31, 38, 39, 40], "239": [9, 13, 20], "0104": 9, "0101": 9, "569": 9, "0092": 9, "819": [9, 31], "835": 9, "51": [9, 12, 18, 23, 34], "024": 9, "0105": 9, "317": [9, 13], "0099": 9, "0097": 9, "552": 9, "0108": 9, "869": 9, "0102": 9, "126": [9, 23], "128": [9, 23], "537": [9, 23], "192": [9, 23], "249": [9, 13], "250": [9, 20, 27], "lm_dir": [9, 23], "search_beam": [9, 18, 23, 34], "output_beam": [9, 18, 23, 34], "min_active_st": [9, 18, 23, 34], "max_active_st": [9, 18, 23, 34], "10000": [9, 18, 23, 34], "use_double_scor": [9, 18, 23, 34], "193": 9, "213": [9, 34], "259": [9, 13, 18], "devic": [9, 10, 12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 39, 40], "217": [9, 18, 23], "279": [9, 23], "averag": [9, 10, 12, 13, 14, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "userwarn": [9, 20], "floor_divid": 9, "futur": [9, 13, 20, 41], "round": [9, 20], "toward": [9, 20], "trunc": [9, 20], "function": [9, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "NOT": [9, 18, 20, 23, 34], "floor": [9, 20], "incorrect": [9, 13, 20], "neg": [9, 20], "valu": [9, 12, 13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "keep": [9, 20, 26, 39, 40], "behavior": [9, 20], "div": [9, 20], "b": [9, 20, 23, 31, 32], "rounding_mod": [9, 20], "actual": [9, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "divis": [9, 20], "trigger": 9, "intern": 9, "aten": 9, "src": [9, 12], "nativ": 9, "binaryop": 9, "cpp": [9, 12, 15], "450": [9, 18, 20, 21], "k": [9, 26, 31, 32, 38, 39, 40], "n": [9, 18, 24, 26, 28, 29, 31, 32, 38, 39, 40], "220": [9, 12, 20, 21, 23], "409": 9, "190": [9, 27], "until": [9, 23, 28], "571": [9, 23], "228": [9, 23], "transcript": [9, 18, 19, 20, 21, 23, 26, 27, 31, 32, 38, 39, 40], "recog": [9, 20, 23], "test_set": [9, 34], "572": 9, "ins": [9, 23, 34], "del": [9, 23, 34], "sub": [9, 23, 34], "573": 9, "236": 9, "wrote": [9, 23], "detail": [9, 11, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "stat": [9, 23], "err": [9, 20, 23], "299": 9, "congratul": [9, 12, 13, 18, 21, 23, 27, 31, 32, 34], "fun": [9, 12, 13], "debug": 9, "variou": [9, 17, 41], "problem": [9, 24], "mai": [9, 12, 13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40, 41], "period": [10, 12], "disk": 10, "optim": [10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "other": [10, 13, 14, 20, 23, 24, 26, 27, 31, 32, 34, 36, 39, 40, 41], "relat": [10, 18, 20, 23, 27, 31, 32, 34], "resum": [10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "howev": [10, 13, 24], "onli": [10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40, 41], "strip": 10, "reduc": [10, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "each": [10, 12, 13, 14, 18, 20, 21, 23, 26, 28, 29, 36, 38, 39, 40], "well": [10, 34, 41], "usag": [10, 12, 13, 15, 16, 27, 31, 32, 34], "pruned_transducer_stateless3": [10, 15, 36], "almost": [10, 26, 36, 39, 40], "dir": [10, 12, 13, 14, 15, 16, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "bpe": [10, 12, 13, 14, 15, 16, 23, 26, 28, 29, 38, 39, 40], "lang_bpe_500": [10, 12, 13, 14, 15, 16, 23, 26, 28, 29, 38, 39, 40], "dict": 10, "host": 10, "csukuangfj": [10, 12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34, 38], "prune": [10, 14, 20, 22, 24, 25, 35, 36, 37, 38, 40], "transduc": [10, 11, 14, 17, 19, 22, 24, 25, 35, 36, 37], "stateless3": [10, 12], "2022": [10, 12, 13, 14, 20, 26, 28, 29, 38, 39], "05": [10, 12, 13, 18, 20, 21, 23, 32], "lf": [10, 12, 13, 14, 18, 20, 21, 23, 27, 29, 31, 32, 34], "repo": [10, 14], "prefix": 10, "those": 10, "wave": [10, 12, 13, 18, 23], "iter": [10, 12, 13, 16, 26, 28, 29, 38, 39, 40], "1224000": 10, "greedy_search": [10, 20, 26, 28, 38, 39, 40], "test_wav": [10, 12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34], "1089": [10, 12, 13, 14, 23, 27], "134686": [10, 12, 13, 14, 23, 27], "0001": [10, 12, 13, 14, 23, 27], "wav": [10, 12, 13, 14, 16, 18, 20, 21, 23, 26, 28, 29, 31, 32, 34, 38, 39, 40], "1221": [10, 12, 13, 23, 27], "135766": [10, 12, 13, 23, 27], "0002": [10, 12, 13, 23, 27], "multipl": [10, 18, 20, 21, 23, 27, 31, 32, 34], "sound": [10, 12, 13, 16, 17, 18, 20, 21, 23, 27, 31, 32, 34], "Its": [10, 12, 13, 23], "output": [10, 12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "09": [10, 13, 18, 20, 21, 23, 38], "02": [10, 12, 13, 20, 23, 26, 32, 38, 39], "233": [10, 12, 13], "265": 10, "reset_interv": [10, 12, 13], "3000": [10, 12, 13], "subsampling_factor": [10, 13, 18, 20, 23], "encoder_dim": [10, 12, 13], "512": [10, 12, 13, 18, 20, 23], "nhead": [10, 12, 18, 20, 23, 26, 39], "dim_feedforward": [10, 12, 13, 20], "2048": [10, 12, 13, 20], "num_encoder_lay": [10, 12, 13, 20], "decoder_dim": [10, 12, 13], "joiner_dim": [10, 12, 13], "model_warm_step": [10, 12, 13], "env_info": [10, 12, 13, 18, 20, 23], "releas": [10, 12, 13, 18, 20, 23], "sha1": [10, 12, 13, 18, 20, 23], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 10, "date": [10, 12, 13, 18, 20, 23], "fri": 10, "oct": [10, 23], "miss": [10, 12, 13, 20, 23], "cu102": [10, 12, 13], "branch": [10, 12, 13, 18, 20, 23, 28], "1013": 10, "c39cba5": 10, "dirti": [10, 12, 13, 18, 23], "thu": [10, 12, 13, 20, 23, 27], "jsonl": 10, "hostnam": [10, 12, 13, 20], "de": [10, 12, 13, 20], "74279": [10, 12, 13, 20], "0324160024": 10, "65bfd8b584": 10, "jjlbn": 10, "ip": [10, 12, 13, 20], "177": [10, 13, 20, 21, 23], "203": [10, 23], "bpe_model": [10, 12, 13, 23], "sound_fil": [10, 18, 20, 23, 34], "sample_r": [10, 18, 20, 23, 34], "16000": [10, 18, 20, 21, 23, 27, 28, 31, 32], "beam": [10, 38], "max_context": 10, "max_stat": 10, "context_s": [10, 12, 13, 20], "max_sym_per_fram": [10, 20], "simulate_stream": 10, "decode_chunk_s": 10, "left_context": 10, "dynamic_chunk_train": 10, "causal_convolut": 10, "short_chunk_s": [10, 39, 40], "25": [10, 12, 13, 18, 23, 26, 31, 32, 34, 39], "num_left_chunk": 10, "blank_id": [10, 12, 13, 20], "unk_id": 10, "vocab_s": [10, 12, 13, 20], "271": [10, 13], "273": [10, 20], "612": 10, "458": 10, "disabl": [10, 12, 13], "giga": [10, 13, 38], "623": 10, "277": 10, "paramet": [10, 12, 13, 15, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 38, 39, 40], "78648040": 10, "951": [10, 23], "285": [10, 20, 23], "construct": [10, 12, 13, 18, 20, 21, 23, 27, 31, 32, 34], "952": 10, "295": [10, 18, 20, 21, 23], "957": 10, "700": 10, "329": [10, 13, 23], "912": 10, "388": 10, "earli": [10, 12, 13, 23, 27], "nightfal": [10, 12, 13, 23, 27], "THE": [10, 12, 13, 23, 27], "yellow": [10, 12, 13, 23, 27], "lamp": [10, 12, 13, 23, 27], "light": [10, 12, 13, 23, 27], "here": [10, 12, 13, 18, 20, 21, 23, 24, 27, 36, 39], "AND": [10, 12, 13, 23, 27], "THERE": [10, 12, 13, 23, 27], "squalid": [10, 12, 13, 23, 27], "quarter": [10, 12, 13, 23, 27], "OF": [10, 12, 13, 23, 27], "brothel": [10, 12, 13, 23, 27], "god": [10, 23, 27], "AS": [10, 23, 27], "direct": [10, 23, 27], "consequ": [10, 23, 27], "sin": [10, 23, 27], "man": [10, 23, 27], "punish": [10, 23, 27], "had": [10, 23, 27], "her": [10, 23, 27], "love": [10, 23, 27], "child": [10, 23, 27], "whose": [10, 20, 23, 27], "ON": [10, 12, 23, 27], "THAT": [10, 23, 27], "dishonor": [10, 23, 27], "bosom": [10, 23, 27], "TO": [10, 23, 27], "parent": [10, 23, 27], "forev": [10, 23, 27], "WITH": [10, 23, 27], "race": [10, 23, 27], "descent": [10, 23, 27], "mortal": [10, 23, 27], "BE": [10, 23, 27], "bless": [10, 23, 27], "soul": [10, 23, 27], "IN": [10, 23, 27], "heaven": [10, 23, 27], "yet": [10, 12, 13, 23, 27], "THESE": [10, 23, 27], "thought": [10, 23, 27], "affect": [10, 23, 27], "hester": [10, 23, 27], "prynn": [10, 23, 27], "less": [10, 23, 27, 34, 39, 40], "hope": [10, 19, 23, 27], "than": [10, 13, 18, 20, 21, 23, 26, 27, 28, 29, 34, 38, 39, 40], "apprehens": [10, 23, 27], "390": 10, "alwai": 10, "down": [10, 18, 23, 26, 28, 29, 38, 39, 40], "reproduc": [10, 23], "ln": [10, 12, 13, 14, 18, 23, 26, 28, 29, 38, 39, 40], "9999": [10, 28, 29, 38], "symlink": 10, "pass": [10, 18, 20, 21, 23, 26, 28, 29, 36, 38, 39, 40], "max": [10, 12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "durat": [10, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "600": [10, 23, 26, 28, 38, 39, 40], "reason": [10, 12, 13, 39], "support": [11, 12, 13, 18, 20, 23, 26, 28, 29, 36, 38, 39, 40], "zipform": [11, 14, 22, 25, 35, 37], "convemform": [11, 17, 36], "perform": [11, 20, 24, 39], "platform": 11, "android": [11, 12, 13], "raspberri": 11, "pi": 11, "\u7231\u82af\u6d3e": 11, "maix": 11, "iii": 11, "axera": 11, "static": 11, "produc": [11, 26, 28, 29, 38, 39, 40], "binari": [11, 12, 13, 18, 20, 21, 23, 26, 34, 38, 39], "everyth": 11, "pnnx": [11, 17], "torchscript": [11, 15, 16, 17], "encod": [11, 14, 16, 17, 18, 20, 21, 23, 26, 27, 28, 34, 36, 38, 39, 40], "option": [11, 14, 17, 20, 24, 27, 31, 32, 34], "int8": [11, 17], "quantiz": [11, 17, 24], "zengwei": [12, 14, 29, 38], "conv": [12, 13], "emform": [12, 13, 15], "stateless2": [12, 13, 38], "07": [12, 13, 18, 20, 21, 23], "ubuntu": [12, 13], "04": [12, 13, 18, 20, 21, 23, 27, 31, 32], "work": [12, 13, 23], "pretrained_model": [12, 13], "online_transduc": 12, "continu": [12, 13, 18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "git_lfs_skip_smudg": [12, 13, 14], "includ": [12, 13, 14, 26, 28, 29, 38, 39, 40], "jit_xxx": [12, 13], "anywher": [12, 13], "submodul": 12, "updat": [12, 13], "recurs": 12, "init": 12, "cmake": [12, 13, 18, 23], "dcmake_build_typ": [12, 18, 23], "dncnn_python": 12, "dncnn_build_benchmark": 12, "off": 12, "dncnn_build_exampl": 12, "dncnn_build_tool": 12, "j4": 12, "pwd": 12, "compon": [12, 36], "execut": [12, 18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "ncnn2int8": [12, 13], "our": [12, 13, 15, 16, 23, 24, 26, 36, 39, 40], "cpython": 12, "38": [12, 18, 20, 23, 31], "gnu": 12, "am": 12, "sai": [12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "But": [12, 26, 28, 29, 38, 39, 40], "doe": [12, 13, 18, 20, 23, 34], "As": [12, 20, 23, 24], "long": 12, "later": [12, 13, 18, 21, 23, 26, 27, 28, 29, 31, 32, 38, 39, 40], "termin": 12, "tencent": [12, 13], "made": 12, "modif": [12, 20], "offic": 12, "synchron": 12, "offici": 12, "renam": [12, 13], "conv_emformer_transducer_stateless2": [12, 36], "num": [12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "layer": [12, 13, 20, 24, 26, 36, 38, 39, 40], "chunk": [12, 14, 39, 40], "length": [12, 20, 39, 40], "cnn": 12, "kernel": [12, 20], "left": [12, 20, 39, 40], "context": [12, 20, 26, 36, 38, 39, 40], "right": [12, 20, 36, 39], "memori": [12, 18, 20, 23, 36], "dim": [12, 13, 20, 26, 39], "yourself": [12, 13, 24, 39, 40], "tune": [12, 13, 18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "combin": [12, 13], "2023": [12, 13, 28], "677": 12, "681": 12, "229": [12, 18], "best_v": 12, "alid_epoch": 12, "subsampl": [12, 39, 40], "ing_factor": 12, "a34171ed85605b0926eebbd0463d059431f4f74a": 12, "wed": [12, 18, 20, 23], "dec": 12, "ver": 12, "ion": 12, "530e8a1": 12, "tue": [12, 23], "star": [12, 13], "op": 12, "1220120619": [12, 13], "7695ff496b": [12, 13], "s9n4w": [12, 13], "127": [12, 13, 34], "icefa": 12, "ll": 12, "transdu": 12, "cer": 12, "use_averaged_model": [12, 13], "cnn_module_kernel": 12, "left_context_length": 12, "chunk_length": 12, "right_context_length": 12, "memory_s": 12, "231": [12, 13], "053": 12, "022": 12, "708": [12, 18, 20, 23, 34], "315": [12, 18, 20, 21, 23, 27], "75490012": 12, "318": [12, 13], "320": [12, 20], "682": 12, "75": 12, "lh": [12, 13], "rw": [12, 13], "kuangfangjun": [12, 13], "289m": 12, "jan": [12, 13], "289": 12, "roughli": [12, 13], "equal": [12, 13, 39, 40], "1024": [12, 13, 38], "287": [12, 34], "1010k": [12, 13], "decoder_jit_trac": [12, 13, 16, 38, 40], "283m": 12, "encoder_jit_trac": [12, 13, 16, 38, 40], "0m": [12, 13], "joiner_jit_trac": [12, 13, 16, 38, 40], "sure": [12, 13], "found": [12, 13, 18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "param": [12, 13], "503k": [12, 13], "437": [12, 13], "142m": 12, "79k": 12, "5m": [12, 13], "488": [12, 13], "text": [12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "architectur": [12, 13, 38], "editor": [12, 13], "content": [12, 13], "compar": [12, 13, 39], "283": 12, "1010": [12, 13], "142": [12, 18, 21, 23], "503": [12, 13], "convers": [12, 13], "half": [12, 13, 26, 39, 40], "joiner": [12, 13, 14, 16, 20, 26, 38, 39, 40], "default": [12, 13, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "float32": [12, 13], "float16": [12, 13], "occupi": [12, 13], "byte": [12, 13], "twice": [12, 13], "smaller": [12, 13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "fp16": [12, 13, 26, 28, 29, 38, 39, 40], "won": [12, 13, 14, 18, 21, 23, 24, 26, 28, 29, 38, 39, 40], "token": [12, 13, 14, 18, 20, 21, 23, 27, 31, 32, 34], "accept": [12, 13], "216": [12, 18, 23, 31, 32], "encoder_param_filenam": [12, 13], "encoder_bin_filenam": [12, 13], "decoder_param_filenam": [12, 13], "decoder_bin_filenam": [12, 13], "joiner_param_filenam": [12, 13], "joiner_bin_filenam": [12, 13], "sound_filenam": [12, 13], "141": 12, "328": 12, "151": 12, "331": [12, 13, 23, 27], "176": [12, 20, 23], "336": 12, "106000": [12, 13, 23, 27], "581": [12, 27], "381": 12, "few": [12, 13, 24], "7767517": [12, 13], "1060": 12, "1342": 12, "in0": [12, 13], "explan": [12, 13], "three": [12, 13, 16, 18, 20, 36], "magic": [12, 13], "intermedi": [12, 13], "mean": [12, 13, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 36, 38, 39, 40], "extra": [12, 13, 20, 36, 39], "increment": [12, 13], "1061": 12, "sherpametadata": [12, 13], "sherpa_meta_data1": [12, 13], "still": [12, 13], "sinc": [12, 13, 24, 34, 38], "newli": [12, 13], "must": [12, 13, 39], "kei": [12, 13, 23], "eas": [12, 13], "list": [12, 13, 18, 20, 21, 23, 27, 31, 32], "pair": [12, 13], "accordingli": [12, 13], "sad": [12, 13], "rememb": [12, 13], "anymor": [12, 13], "flexibl": [12, 13], "edit": [12, 13], "arm": [12, 13], "aarch64": [12, 13], "onc": [12, 13], "mayb": [12, 13], "year": [12, 13], "_jit_trac": [12, 13], "56": [12, 13, 23, 31], "fp32": [12, 13], "doubl": [12, 13], "j": [12, 13, 18, 23], "scale": [12, 13, 18, 23, 24, 27, 29, 31, 32], "py38": [12, 13], "arg": [12, 13], "wave_filenam": [12, 13], "16k": [12, 13], "hz": [12, 13, 31, 32], "mono": [12, 13], "calibr": [12, 13], "purpos": [12, 13], "cat": [12, 13], "eof": [12, 13], "calcul": [12, 13, 28, 39, 40], "has_gpu": [12, 13], "config": [12, 13], "use_vulkan_comput": [12, 13], "88": [12, 20], "conv_87": 12, "942385": [12, 13], "threshold": [12, 13, 28], "938493": 12, "968131": 12, "conv_88": 12, "442448": 12, "549335": 12, "167552": 12, "conv_89": 12, "228289": 12, "001738": 12, "871552": 12, "linear_90": 12, "976146": 12, "101789": 12, "115": [12, 13, 18, 23], "267128": 12, "linear_91": 12, "962030": 12, "162033": 12, "602713": 12, "linear_92": 12, "323041": 12, "853959": 12, "953129": 12, "linear_94": 12, "905416": 12, "648006": 12, "323545": 12, "linear_93": 12, "474093": 12, "200188": 12, "linear_95": 12, "888012": 12, "403563": 12, "483986": 12, "linear_96": 12, "856741": 12, "398679": 12, "524273": 12, "linear_97": 12, "635942": 12, "613655": 12, "590950": 12, "linear_98": 12, "460340": 12, "670146": 12, "398010": 12, "linear_99": 12, "532276": 12, "585537": 12, "119396": 12, "linear_101": 12, "585871": 12, "719224": 12, "205809": 12, "linear_100": 12, "751382": 12, "081648": 12, "linear_102": 12, "593344": 12, "450581": 12, "551147": 12, "linear_103": 12, "592681": 12, "705824": 12, "257959": 12, "linear_104": 12, "752957": 12, "980955": 12, "110489": 12, "linear_105": 12, "696240": 12, "877193": 12, "608953": 12, "linear_106": 12, "059659": 12, "643138": 12, "048950": 12, "linear_108": 12, "975461": 12, "589567": 12, "671457": 12, "linear_107": 12, "190381": 12, "515701": 12, "linear_109": 12, "710759": 12, "305635": 12, "082436": 12, "linear_110": 12, "531228": 12, "731162": 12, "159557": 12, "linear_111": 12, "528083": 12, "259322": 12, "211544": 12, "linear_112": 12, "148807": 12, "500842": 12, "087374": 12, "linear_113": 12, "592566": 12, "948851": 12, "65": 12, "166611": 12, "linear_115": 12, "437109": 12, "608947": 12, "642395": 12, "linear_114": 12, "193942": 12, "503904": 12, "linear_116": 12, "966980": 12, "200896": 12, "676392": 12, "linear_117": 12, "451303": 12, "061664": 12, "951344": 12, "linear_118": 12, "077262": 12, "965800": 12, "023804": 12, "linear_119": 12, "671615": 12, "847613": 12, "198460": 12, "linear_120": 12, "625638": 12, "131427": 12, "556595": 12, "linear_122": 12, "274080": 12, "888716": 12, "978189": 12, "linear_121": 12, "420480": 12, "429659": 12, "linear_123": 12, "826197": 12, "599617": 12, "281532": 12, "linear_124": 12, "396383": 12, "325849": 12, "335875": 12, "linear_125": 12, "337198": 12, "941410": 12, "221970": 12, "linear_126": 12, "699965": 12, "842878": 12, "224073": 12, "linear_127": 12, "775370": 12, "884215": 12, "696438": 12, "linear_129": 12, "872276": 12, "837319": 12, "254213": 12, "linear_128": 12, "180057": 12, "687883": 12, "linear_130": 12, "150427": 12, "454298": 12, "765789": 12, "linear_131": 12, "112692": 12, "924847": 12, "025545": 12, "linear_132": 12, "852893": 12, "116593": 12, "749626": 12, "linear_133": 12, "517084": 12, "024665": 12, "275314": 12, "linear_134": 12, "683807": 12, "878618": 12, "743618": 12, "linear_136": 12, "421055": 12, "322729": 12, "086264": 12, "linear_135": 12, "309880": 12, "917679": 12, "linear_137": 12, "827781": 12, "744595": 12, "915554": 12, "linear_138": 12, "422395": 12, "742882": 12, "402161": 12, "linear_139": 12, "527538": 12, "866123": 12, "849449": 12, "linear_140": 12, "128619": 12, "657793": 12, "266134": 12, "linear_141": 12, "839593": 12, "845993": 12, "021378": 12, "linear_143": 12, "442304": 12, "099039": 12, "889746": 12, "linear_142": 12, "325038": 12, "849592": 12, "linear_144": 12, "929444": 12, "618206": 12, "605080": 12, "linear_145": 12, "382126": 12, "321095": 12, "625010": 12, "linear_146": 12, "894987": 12, "867645": 12, "836517": 12, "linear_147": 12, "915313": 12, "906028": 12, "886522": 12, "linear_148": 12, "614287": 12, "908151": 12, "496181": 12, "linear_150": 12, "724932": 12, "485588": 12, "28": [12, 13, 20, 23, 27], "312899": 12, "linear_149": 12, "161146": 12, "606939": 12, "linear_151": 12, "164453": 12, "847355": 12, "719223": 12, "linear_152": 12, "086471": 12, "984121": 12, "222834": 12, "linear_153": 12, "099524": 12, "991601": 12, "816805": 12, "linear_154": 12, "054585": 12, "489706": 12, "286930": 12, "linear_155": 12, "389185": 12, "100321": 12, "963501": 12, "linear_157": 12, "982999": 12, "154796": 12, "637253": 12, "linear_156": 12, "537706": 12, "875190": 12, "linear_158": 12, "420287": 12, "502287": 12, "531588": 12, "linear_159": 12, "014746": 12, "423280": 12, "477261": 12, "linear_160": 12, "45": [12, 18, 20, 23], "633553": 12, "715335": 12, "220921": 12, "linear_161": 12, "371849": 12, "117830": 12, "815203": 12, "linear_162": 12, "492933": 12, "126283": 12, "623318": 12, "linear_164": 12, "697504": 12, "825712": 12, "317358": 12, "linear_163": 12, "078367": 12, "008038": 12, "linear_165": 12, "023975": 12, "836278": 12, "577358": 12, "linear_166": 12, "860619": 12, "259792": 12, "493614": 12, "linear_167": 12, "380934": 12, "496160": 12, "107042": 12, "linear_168": 12, "691216": 12, "733317": 12, "831076": 12, "linear_169": 12, "723948": 12, "952728": 12, "129707": 12, "linear_171": 12, "034811": 12, "366547": 12, "665123": 12, "linear_170": 12, "356277": 12, "710501": 12, "linear_172": 12, "556884": 12, "729481": 12, "166058": 12, "linear_173": 12, "033039": 12, "207264": 12, "442120": 12, "linear_174": 12, "597379": 12, "658676": 12, "47": [12, 13, 18, 23], "768131": 12, "linear_2": [12, 13], "293503": 12, "305265": 12, "877850": 12, "linear_1": [12, 13], "812222": 12, "766452": 12, "487047": 12, "linear_3": [12, 13], "999999": 12, "999755": 12, "031174": 12, "wish": [12, 13], "low": [12, 13], "accuraci": [12, 13, 19], "955k": 12, "18k": 12, "inparam": [12, 13], "inbin": [12, 13], "outparam": [12, 13], "outbin": [12, 13], "99m": 12, "78k": 12, "774k": [12, 13], "496": [12, 13, 23, 27], "99": [12, 13, 14], "774": [12, 13], "much": [12, 13], "linear": [12, 13, 20], "convolut": [12, 13, 28, 36, 39], "exact": [12, 13], "4x": [12, 13], "speed": [12, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "comparison": 12, "44": [12, 13, 23, 31, 32], "468000": [13, 16, 38], "lstm_transducer_stateless2": [13, 16, 38], "rnn": [13, 20, 26, 28, 38, 39, 40], "hidden": [13, 38], "862": 13, "222": [13, 21, 23], "865": 13, "is_pnnx": 13, "62e404dd3f3a811d73e424199b3408e309c06e1a": 13, "mon": 13, "6d7a559": 13, "feb": [13, 20], "54": [13, 23, 27, 31, 32], "147": 13, "rnn_hidden_s": 13, "aux_layer_period": 13, "235": 13, "43": [13, 23], "472": 13, "595": 13, "324": 13, "83137520": 13, "596": 13, "257024": 13, "326": 13, "781812": 13, "327": 13, "84176356": 13, "182": [13, 18, 27], "158": 13, "183": [13, 31, 32], "335": 13, "101": 13, "tracerwarn": 13, "boolean": 13, "might": [13, 39, 40], "caus": [13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "flow": 13, "treat": 13, "constant": 13, "need_pad": 13, "bool": 13, "180": [13, 18, 23], "339": 13, "304": 13, "207": [13, 21, 23], "84": [13, 18], "324m": 13, "321": [13, 18], "318m": 13, "159m": 13, "21k": 13, "159": [13, 23, 34], "37": [13, 18, 20, 23, 31], "861": 13, "255": 13, "425": [13, 23], "427": [13, 23], "266": [13, 23, 27], "431": 13, "342": 13, "343": 13, "267": [13, 20, 31, 32], "379": 13, "268": [13, 23, 27], "317m": 13, "conv_15": 13, "930708": 13, "972025": 13, "conv_16": 13, "978855": 13, "031788": 13, "456645": 13, "conv_17": 13, "868437": 13, "830528": 13, "218575": 13, "linear_18": 13, "107259": 13, "194808": 13, "106": [13, 23], "293236": 13, "linear_19": 13, "193777": 13, "634748": 13, "401705": 13, "linear_20": 13, "259933": 13, "606617": 13, "722160": 13, "linear_21": 13, "186600": 13, "790260": 13, "512129": 13, "linear_22": 13, "759041": 13, "265832": 13, "050053": 13, "linear_23": 13, "931209": 13, "099090": 13, "979767": 13, "linear_24": 13, "324160": 13, "215561": 13, "321835": 13, "linear_25": 13, "800708": 13, "599352": 13, "284134": 13, "linear_26": 13, "492444": 13, "153369": 13, "274391": 13, "linear_27": 13, "660161": 13, "720994": 13, "46": [13, 18, 23], "674126": 13, "linear_28": 13, "415265": 13, "174434": 13, "007133": 13, "linear_29": 13, "038418": 13, "118534": 13, "724262": 13, "linear_30": 13, "072084": 13, "936867": 13, "259155": 13, "linear_31": 13, "342712": 13, "599489": 13, "282787": 13, "linear_32": 13, "340535": 13, "120308": 13, "701103": 13, "linear_33": 13, "846987": 13, "630030": 13, "985939": 13, "linear_34": 13, "686298": 13, "204571": 13, "607586": 13, "linear_35": 13, "904821": 13, "575518": 13, "756420": 13, "linear_36": 13, "806659": 13, "585589": 13, "118401": 13, "linear_37": 13, "402340": 13, "047157": 13, "162680": 13, "linear_38": 13, "174589": 13, "923361": 13, "030258": 13, "linear_39": 13, "178576": 13, "556058": 13, "807705": 13, "linear_40": 13, "901954": 13, "301267": 13, "956539": 13, "linear_41": 13, "839805": 13, "597429": 13, "716181": 13, "linear_42": 13, "178945": 13, "651595": 13, "895699": 13, "829245": 13, "627592": 13, "637907": 13, "746186": 13, "255032": 13, "167313": 13, "000000": 13, "999756": 13, "031013": 13, "345k": 13, "17k": 13, "218m": 13, "218": 13, "larger": [13, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "counterpart": 13, "bit": [13, 18, 20, 21, 23, 27, 34], "4532": 13, "onnx_pretrain": 14, "onnxruntim": 14, "stateless7": 14, "repo_url": 14, "basenam": 14, "pushd": 14, "popd": 14, "pruned_transducer_stateless7_stream": [14, 40], "len": [14, 40], "tree": [15, 16, 18, 20, 21, 23, 27, 31, 32, 34, 38], "cpu_jit": [15, 18, 23, 26, 28, 29, 39, 40], "confus": 15, "move": [15, 26, 28, 29, 39, 40], "why": 15, "streaming_asr": [15, 16, 38, 39, 40], "conv_emform": 15, "offline_asr": [15, 26], "jit_pretrain": [16, 28, 29, 38], "baz": 16, "tutori": [18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 38, 39, 40], "learn": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "singl": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "1best": [18, 21, 23, 27, 28, 29, 31, 32], "automag": [18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "stop": [18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "control": [18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "By": [18, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "musan": [18, 21, 23, 24, 26, 28, 29, 38, 39, 40], "thei": [18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "intal": [18, 21], "initi": [18, 21], "sudo": [18, 21], "apt": [18, 21], "permiss": [18, 21], "commandlin": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "quit": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "often": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "experi": [18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "state": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "world": [18, 20, 21, 23, 24, 26, 27, 28, 29, 38, 39, 40], "multi": [18, 20, 21, 23, 24, 26, 28, 29, 36, 38, 39, 40], "machin": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "ddp": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "implement": [18, 20, 21, 23, 24, 26, 28, 29, 36, 38, 39, 40], "present": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "second": [18, 20, 21, 23, 24, 26, 28, 29, 34, 38, 39, 40], "over": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "utter": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "pad": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "oom": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "v100": [18, 20, 21, 23], "nvidia": [18, 20, 21, 23], "due": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "usual": [18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "increas": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "weight": [18, 21, 23, 28, 29, 38], "decai": [18, 21, 23, 28, 29, 38], "warmup": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "get_param": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "realli": [18, 21, 23, 26, 28, 29, 38, 39, 40], "directli": [18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "perturb": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "factor": [18, 20, 21, 23, 24, 26, 28, 29, 38, 39, 40], "3x150": [18, 20, 21], "hour": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "These": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "rate": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "visual": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "logdir": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "labelsmooth": 18, "someth": [18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "tensorflow": [18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "press": [18, 20, 21, 23, 26, 28, 29, 34, 38, 39, 40], "ctrl": [18, 20, 21, 23, 26, 28, 29, 34, 38, 39, 40], "engw8ksktzqs24zbv5dgcg": 18, "22t11": 18, "scan": [18, 20, 21, 23, 26, 34, 38, 39], "116068": 18, "scalar": [18, 20, 21, 23, 26, 34, 38, 39], "listen": [18, 20, 21, 26, 34, 38, 39], "url": [18, 20, 21, 23, 26, 28, 29, 34, 38, 39], "xxxx": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "saw": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "consol": [18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "typic": [18, 20, 21, 23], "avoid": [18, 20, 23], "commonli": [18, 20, 21, 23, 27, 31, 32, 34], "nbest": [18, 23, 29], "lattic": [18, 21, 23, 26, 27, 31, 32, 39, 40], "score": [18, 23, 26, 39, 40], "uniqu": [18, 23, 26, 39, 40], "pkufool": [18, 21, 27], "icefall_asr_aishell_conformer_ctc": 18, "transcrib": [18, 20, 21, 23], "lang_char": [18, 20], "word": [18, 20, 21, 23, 27, 31, 32, 34], "bac009s0764w0121": [18, 20, 21], "bac009s0764w0122": [18, 20, 21], "bac009s0764w0123": [18, 20, 21], "tran": [18, 21, 23, 27, 31, 32], "graph": [18, 21, 23, 26, 27, 31, 32, 39, 40], "id": [18, 21, 23, 27, 31, 32], "conveni": [18, 21, 23, 24], "eo": [18, 21, 23], "easili": [18, 21, 23], "obtain": [18, 20, 21, 23, 27, 31, 32], "soxi": [18, 20, 21, 23, 27, 34], "sampl": [18, 20, 21, 23, 27, 28, 34, 39, 40], "precis": [18, 20, 21, 23, 26, 27, 34, 39, 40], "67263": [18, 20, 21], "cdda": [18, 20, 21, 23, 27, 34], "sector": [18, 20, 21, 23, 27, 34], "135k": [18, 20, 21], "256k": [18, 20, 21, 23], "sign": [18, 20, 21, 23, 34], "integ": [18, 20, 21, 23, 34], "pcm": [18, 20, 21, 23, 34], "65840": [18, 20, 21], "625": [18, 20, 21], "132k": [18, 20, 21], "64000": [18, 20, 21], "300": [18, 20, 21, 23, 24, 26, 39], "128k": [18, 20, 21, 34], "displai": [18, 20, 21, 23], "topologi": [18, 23], "53": [18, 26, 27, 32, 38, 39], "707": [18, 23], "attention_dim": [18, 20, 23], "num_decoder_lay": [18, 23], "vgg_frontend": [18, 20, 23], "use_feat_batchnorm": [18, 23], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 18, "sun": 18, "sep": 18, "33cfe45": 18, "d57a873": 18, "nov": [18, 23], "hw": 18, "kangwei": 18, "icefall_aishell3": 18, "k2_releas": 18, "tokens_fil": 18, "words_fil": [18, 23, 34], "num_path": [18, 23, 26, 39, 40], "ngram_lm_scal": [18, 23], "attention_decoder_scal": [18, 23], "nbest_scal": [18, 23], "sos_id": [18, 23], "eos_id": [18, 23], "num_class": [18, 23, 34], "4336": [18, 20], "242": [18, 23], "131": [18, 23], "134": 18, "269": [18, 31, 32], "275": 18, "241": 18, "293": [18, 23], "704": [18, 31], "369": [18, 23], "\u751a": [18, 20], "\u81f3": [18, 20], "\u51fa": [18, 20], "\u73b0": [18, 20], "\u4ea4": [18, 20], "\u6613": [18, 20], "\u51e0": [18, 20], "\u4e4e": [18, 20], "\u505c": [18, 20], "\u6b62": 18, "\u7684": [18, 20, 21], "\u60c5": [18, 20], "\u51b5": [18, 20], "\u4e00": [18, 20], "\u4e8c": [18, 20], "\u7ebf": [18, 20, 21], "\u57ce": [18, 20], "\u5e02": [18, 20], "\u867d": [18, 20], "\u7136": [18, 20], "\u4e5f": [18, 20, 21], "\u5904": [18, 20], "\u4e8e": [18, 20], "\u8c03": [18, 20], "\u6574": [18, 20], "\u4e2d": [18, 20, 21], "\u4f46": [18, 20, 21], "\u56e0": [18, 20], "\u4e3a": [18, 20], "\u805a": [18, 20], "\u96c6": [18, 20], "\u4e86": [18, 20, 21], "\u8fc7": [18, 20], "\u591a": [18, 20], "\u516c": [18, 20], "\u5171": [18, 20], "\u8d44": [18, 20], "\u6e90": [18, 20], "371": 18, "683": 18, "651": [18, 34], "654": 18, "659": 18, "752": 18, "887": 18, "340": 18, "370": 18, "\u751a\u81f3": [18, 21], "\u51fa\u73b0": [18, 21], "\u4ea4\u6613": [18, 21], "\u51e0\u4e4e": [18, 21], "\u505c\u6b62": 18, "\u60c5\u51b5": [18, 21], "\u4e00\u4e8c": [18, 21], "\u57ce\u5e02": [18, 21], "\u867d\u7136": [18, 21], "\u5904\u4e8e": [18, 21], "\u8c03\u6574": [18, 21], "\u56e0\u4e3a": [18, 21], "\u805a\u96c6": [18, 21], "\u8fc7\u591a": [18, 21], "\u516c\u5171": [18, 21], "\u8d44\u6e90": [18, 21], "372": 18, "recor": [18, 23], "highest": [18, 23], "965": 18, "966": 18, "821": 18, "822": 18, "826": 18, "916": 18, "345": 18, "888": 18, "889": 18, "limit": [18, 20, 23, 36, 39], "upgrad": [18, 23], "pro": [18, 23], "finish": [18, 20, 21, 23, 24, 26, 27, 31, 32, 34, 39, 40], "deploi": [18, 23], "At": [18, 23], "checkout": [18, 23], "v2": [18, 23], "hlg_decod": [18, 23], "four": [18, 23], "messag": [18, 23, 26, 28, 29, 38, 39, 40], "nn_model": [18, 23], "use_gpu": [18, 23], "word_tabl": [18, 23], "caution": [18, 23], "forward": [18, 23, 28], "cu": [18, 23], "int": [18, 23], "char": [18, 23], "124": [18, 23], "98": 18, "150": [18, 23], "693": [18, 31], "165": [18, 23], "nnet_output": [18, 23], "489": 18, "mandarin": 19, "corpu": 19, "beij": 19, "shell": 19, "technologi": 19, "ltd": 19, "400": 19, "peopl": 19, "accent": 19, "area": 19, "china": 19, "invit": 19, "particip": 19, "conduct": 19, "quiet": 19, "indoor": 19, "high": 19, "fidel": 19, "microphon": 19, "downsampl": 19, "16khz": 19, "manual": 19, "through": 19, "profession": 19, "annot": 19, "strict": 19, "inspect": 19, "free": [19, 24, 38], "academ": 19, "moder": 19, "amount": 19, "research": 19, "field": 19, "openslr": 19, "ctc": [19, 22, 25, 29, 30, 33], "stateless": [19, 22, 26, 38, 39, 40], "instead": [20, 39], "head": [20, 36], "feedforward": [20, 26, 39], "embed": [20, 26, 38, 39, 40], "conv1d": [20, 26, 38, 39, 40], "nn": [20, 26, 28, 29, 38, 39, 40], "tanh": 20, "borrow": 20, "ieeexplor": 20, "ieee": 20, "stamp": 20, "jsp": 20, "arnumb": 20, "9054419": 20, "predict": [20, 24, 26, 38, 39, 40], "charact": 20, "unit": 20, "vocabulari": 20, "87939824": 20, "optimized_transduc": 20, "technqiu": 20, "propos": [20, 36, 40], "improv": 20, "end": [20, 26, 28, 29, 34, 38, 39, 40], "furthermor": 20, "maximum": 20, "emit": 20, "per": [20, 26, 39, 40], "frame": [20, 26, 28, 39, 40], "simplifi": [20, 36], "significantli": 20, "degrad": 20, "exactli": 20, "benchmark": 20, "unprun": 20, "advantag": 20, "minim": 20, "pruned_transducer_stateless": [20, 26, 36, 39], "altern": 20, "though": 20, "transducer_stateless_modifi": 20, "pr": 20, "gb": 20, "ram": 20, "small": [20, 31, 32, 34], "tri": 20, "prob": [20, 38], "appli": [20, 36], "configur": [20, 24, 27, 31, 32, 34], "c": [20, 21, 26, 28, 29, 34, 38, 39, 40], "lagz6hrcqxoigbfd5e0y3q": 20, "03t14": 20, "8477": 20, "sym": [20, 26, 39, 40], "beam_search": [20, 26, 39, 40], "decoding_method": 20, "beam_4": 20, "ensur": 20, "give": 20, "poor": 20, "531": [20, 21], "994": [20, 23], "027": 20, "encoder_out_dim": 20, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 20, "50d2281": 20, "mar": 20, "0815224919": 20, "75d558775b": 20, "mmnv8": 20, "72": [20, 23], "878": [20, 32], "257": [20, 31, 32], "880": 20, "891": 20, "__floordiv__": 20, "x_len": 20, "163": [20, 23], "\u6ede": 20, "322": 20, "759": 20, "760": 20, "919": 20, "922": 20, "046": 20, "047": 20, "319": [20, 23], "214": [20, 23], "215": [20, 23, 27], "402": 20, "topk_hyp_index": 20, "topk_index": 20, "logit": 20, "583": [20, 32], "2000": 21, "lji9mwuorlow3jkdhxwk8a": 21, "13t11": 21, "4454": 21, "icefall_asr_aishell_tdnn_lstm_ctc": 21, "858": [21, 23], "389": [21, 23], "154": 21, "161": [21, 23], "536": 21, "171": [21, 23, 31, 32], "539": 21, "917": 21, "129": 21, "\u505c\u6ede": 21, "statelessx": [22, 24, 25, 35, 36, 37], "mmi": [22, 25], "blank": [22, 25], "skip": [22, 24, 25, 26, 38, 39, 40], "distil": [22, 25], "hubert": [22, 25], "ligru": [22, 30], "full": [23, 24, 26, 28, 29, 38, 39, 40], "libri": [23, 24, 26, 28, 29, 38, 39, 40], "960": [23, 26, 28, 29, 38, 39, 40], "subset": [23, 26, 28, 29, 38, 39, 40], "3x960": [23, 26, 28, 29, 38, 39, 40], "2880": [23, 26, 28, 29, 38, 39, 40], "lzgnetjwrxc3yghnmd4kpw": 23, "24t16": 23, "4540": 23, "sentenc": 23, "piec": 23, "And": [23, 26, 28, 29, 38, 39, 40], "neither": 23, "nor": 23, "vocab": 23, "5000": 23, "033": 23, "538": 23, "full_libri": [23, 24], "406": 23, "464": 23, "548": 23, "776": 23, "652": [23, 34], "109226120": 23, "714": [23, 31], "473": 23, "944": 23, "1328": 23, "443": [23, 27], "2563": 23, "494": 23, "592": 23, "1715": 23, "52576": 23, "1424": 23, "807": 23, "506": 23, "808": [23, 31], "522": 23, "362": 23, "565": 23, "1477": 23, "2922": 23, "208": 23, "4295": 23, "52343": 23, "396": 23, "3584": 23, "433": 23, "680": [23, 31], "_pickl": 23, "unpicklingerror": 23, "hlg_modifi": 23, "g_4_gram": [23, 27, 31, 32], "875": [23, 27], "212k": 23, "267440": [23, 27], "1253": [23, 27], "535k": 23, "83": [23, 27], "77200": [23, 27], "361": [23, 27], "154k": 23, "554": 23, "260": 23, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 23, "8d93169": 23, "601": 23, "758": 23, "025": 23, "204": 23, "broffel": 23, "osom": 23, "723": 23, "775": 23, "881": 23, "352": 23, "234": 23, "384": 23, "whole": [23, 27, 31, 32, 39, 40], "ngram": [23, 27, 31, 32], "857": 23, "979": 23, "980": 23, "055": 23, "117": 23, "051": 23, "363": 23, "959": [23, 32], "546": 23, "598": 23, "599": [23, 27], "833": 23, "834": 23, "915": 23, "076": 23, "110": 23, "397": 23, "999": [23, 26, 39, 40], "concaten": 23, "bucket": 23, "sampler": 23, "1000": 23, "ctc_decod": 23, "ngram_lm_rescor": 23, "attention_rescor": 23, "kind": [23, 26, 28, 29, 38, 39, 40], "316": 23, "118": 23, "221": 23, "125": [23, 34], "136": 23, "144": 23, "543": 23, "174": 23, "topo": 23, "547": 23, "729": 23, "111": 23, "702": 23, "703": 23, "545": 23, "122": 23, "280": 23, "135": [23, 34], "153": [23, 34], "945": 23, "475": 23, "191": [23, 31, 32], "398": 23, "199": [23, 27], "515": 23, "205": 23, "w": [23, 31, 32], "deseri": 23, "441": 23, "fsaclass": 23, "loadfsa": 23, "const": 23, "string": 23, "c10": 23, "ignor": 23, "dummi": 23, "589": 23, "attention_scal": 23, "656": 23, "162": 23, "169": [23, 31, 32], "188": 23, "624": 23, "519": [23, 32], "632": 23, "645": [23, 34], "243": 23, "970": 23, "303": 23, "179": 23, "knowledg": 24, "_": 24, "vector": 24, "mvq": 24, "kd": 24, "paper": [24, 26, 38, 39, 40], "pruned_transducer_stateless4": [24, 26, 36, 39], "theoret": 24, "applic": 24, "minor": 24, "out": 24, "necessari": 24, "thing": 24, "distillation_with_hubert": 24, "Of": 24, "cours": 24, "xl": 24, "know": 24, "proce": 24, "960h": [24, 28], "use_extracted_codebook": 24, "augment": 24, "th": [24, 31, 32], "fine": 24, "embedding_lay": 24, "num_codebook": 24, "under": 24, "vq_fbank_layer36_cb8": 24, "whola": 24, "snippet": 24, "echo": 24, "awk": 24, "split": 24, "pruned_transducer_stateless6": 24, "12359": 24, "spec": 24, "aug": 24, "warp": 24, "enabl": 24, "argument": [24, 36], "paid": 24, "similar": [24, 28, 39, 40], "suitabl": [26, 38, 39, 40], "pruned_transducer_stateless2": [26, 36, 39], "pruned_transducer_stateless5": [26, 36, 39], "scroll": [26, 28, 29, 38, 39, 40], "scratch": [26, 28, 29, 38, 39, 40], "arxiv": [26, 38, 39, 40], "ab": [26, 38, 39, 40], "2206": [26, 38, 39, 40], "13236": [26, 38, 39, 40], "rework": [26, 36, 39], "daniel": [26, 39, 40], "joint": [26, 38, 39, 40], "contrari": [26, 38, 39, 40], "convent": [26, 38, 39, 40], "recurr": [26, 38, 39, 40], "2x": [26, 39, 40], "dimens": [26, 39, 40], "littl": [26, 39], "allow": [26, 39], "436000": [26, 28, 29, 38, 39, 40], "438000": [26, 28, 29, 38, 39, 40], "qogspbgsr8kzcrmmie9jgw": 26, "20t15": [26, 38, 39], "4468": [26, 38, 39], "210171": [26, 38, 39], "access": [26, 28, 29, 38, 39, 40], "6008": [26, 28, 29, 38, 39, 40], "localhost": [26, 28, 29, 38, 39, 40], "expos": [26, 28, 29, 38, 39, 40], "proxi": [26, 28, 29, 38, 39, 40], "bind_al": [26, 28, 29, 38, 39, 40], "suggest": [26, 28, 29, 38, 39, 40], "both": [26, 28, 29, 36, 38, 39, 40], "lowest": [26, 28, 29, 38, 39, 40], "fast_beam_search": [26, 28, 38, 39, 40], "474000": [26, 38, 39, 40], "largest": [26, 39, 40], "posterior": [26, 28, 39, 40], "algorithm": [26, 39, 40], "pdf": [26, 29, 39, 40], "1211": [26, 39, 40], "3711": [26, 39, 40], "espnet": [26, 39, 40], "net": [26, 39, 40], "beam_search_transduc": [26, 39, 40], "basicli": [26, 39, 40], "topk": [26, 39, 40], "expand": [26, 39, 40], "mode": [26, 39, 40], "being": [26, 39, 40], "hardcod": [26, 39, 40], "composit": [26, 39, 40], "between": [26, 39, 40], "log_prob": [26, 39, 40], "hard": [26, 36, 39, 40], "2211": [26, 39, 40], "00484": [26, 39, 40], "rnnt": [26, 39, 40], "effici": [26, 39, 40], "fast_beam_search_lg": [26, 39, 40], "trivial": [26, 39, 40], "fast_beam_search_nbest": [26, 39, 40], "random_path": [26, 39, 40], "shortest": [26, 39, 40], "fast_beam_search_nbest_lg": [26, 39, 40], "logic": [26, 39, 40], "smallest": [26, 38, 39, 40], "icefall_asr_librispeech_tdnn": 27, "lstm_ctc": 27, "flac": 27, "116k": 27, "140k": 27, "343k": 27, "164k": 27, "105k": 27, "174k": 27, "pretraind": 27, "168": 27, "170": 27, "584": [27, 32], "209": 27, "791": 27, "245": 27, "099": 27, "methond": [27, 31, 32], "725": 27, "403": 27, "631": 27, "010": 27, "guidanc": 28, "bigger": 28, "simpli": 28, "discard": 28, "prevent": 28, "lconv": 28, "encourag": [28, 29, 38], "stabil": [28, 29], "doesn": 28, "warm": [28, 29], "xyozukpeqm62hbilud4upa": [28, 29], "ctc_guide_decode_b": 28, "pretrained_ctc": 28, "jit_pretrained_ctc": 28, "100h": 28, "yfyeung": 28, "wechat": 29, "zipformer_mmi": 29, "worker": [29, 38], "hp": 29, "tdnn_ligru_ctc": 31, "enough": [31, 32, 34], "luomingshuang": [31, 32], "icefall_asr_timit_tdnn_ligru_ctc": 31, "pretrained_average_9_25": 31, "fdhc0_si1559": [31, 32], "felc0_si756": [31, 32], "fmgd0_si1564": [31, 32], "ffprobe": [31, 32], "show_format": [31, 32], "nistspher": [31, 32], "database_id": [31, 32], "database_vers": [31, 32], "utterance_id": [31, 32], "dhc0_si1559": [31, 32], "sample_min": [31, 32], "4176": [31, 32], "sample_max": [31, 32], "5984": [31, 32], "bitrat": [31, 32], "258": [31, 32], "audio": [31, 32], "pcm_s16le": [31, 32], "s16": [31, 32], "256": [31, 32], "elc0_si756": [31, 32], "1546": [31, 32], "1989": [31, 32], "mgd0_si1564": [31, 32], "7626": [31, 32], "10573": [31, 32], "660": 31, "695": 31, "697": 31, "210": [31, 32], "829": 31, "sil": [31, 32], "dh": [31, 32], "ih": [31, 32], "uw": [31, 32], "ah": [31, 32], "ii": [31, 32], "z": [31, 32], "aa": [31, 32], "ei": [31, 32], "dx": [31, 32], "uh": [31, 32], "ng": [31, 32], "eh": [31, 32], "jh": [31, 32], "er": [31, 32], "ai": [31, 32], "hh": [31, 32], "aw": 31, "ae": [31, 32], "705": 31, "715": 31, "720": 31, "251": [31, 32], "348": 31, "ch": 31, "icefall_asr_timit_tdnn_lstm_ctc": 32, "pretrained_average_16_25": 32, "816": 32, "827": 32, "387": 32, "unk": 32, "739": 32, "971": 32, "977": 32, "978": 32, "981": 32, "ow": 32, "ykubhb5wrmosxykid1z9eg": 34, "23t23": 34, "icefall_asr_yesno_tdnn": 34, "l_disambig": 34, "lexicon_disambig": 34, "arpa": 34, "0_0_0_1_0_0_0_1": 34, "0_0_1_0_0_0_1_0": 34, "0_0_1_0_0_1_1_1": 34, "0_0_1_0_1_0_0_1": 34, "0_0_1_1_0_0_0_1": 34, "0_0_1_1_0_1_1_0": 34, "0_0_1_1_1_0_0_0": 34, "0_0_1_1_1_1_0_0": 34, "0_1_0_0_0_1_0_0": 34, "0_1_0_0_1_0_1_0": 34, "0_1_0_1_0_0_0_0": 34, "0_1_0_1_1_1_0_0": 34, "0_1_1_0_0_1_1_1": 34, "0_1_1_1_0_0_1_0": 34, "0_1_1_1_1_0_1_0": 34, "1_0_0_0_0_0_0_0": 34, "1_0_0_0_0_0_1_1": 34, "1_0_0_1_0_1_1_1": 34, "1_0_1_1_0_1_1_1": 34, "1_0_1_1_1_1_0_1": 34, "1_1_0_0_0_1_1_1": 34, "1_1_0_0_1_0_1_1": 34, "1_1_0_1_0_1_0_0": 34, "1_1_0_1_1_0_0_1": 34, "1_1_0_1_1_1_1_0": 34, "1_1_1_0_0_1_0_1": 34, "1_1_1_0_1_0_1_0": 34, "1_1_1_1_0_0_1_0": 34, "1_1_1_1_1_0_0_0": 34, "1_1_1_1_1_1_1_1": 34, "54080": 34, "507": 34, "108k": 34, "ye": 34, "hebrew": 34, "NO": 34, "621": 34, "119": 34, "650": 34, "139": 34, "143": 34, "198": 34, "181": 34, "186": 34, "187": 34, "correctli": 34, "simplest": 34, "former": 36, "idea": 36, "achiev": 36, "mask": [36, 39, 40], "wenet": 36, "did": 36, "metion": 36, "complic": 36, "techniqu": 36, "bank": 36, "memor": 36, "histori": 36, "introduc": 36, "variant": 36, "pruned_stateless_emformer_rnnt2": 36, "conv_emformer_transducer_stateless": 36, "ourself": 36, "mechan": 36, "onlin": 38, "lstm_transducer_stateless": 38, "lower": 38, "prepare_giga_speech": 38, "cj2vtpiwqhkn9q1tx6ptpg": 38, "dynam": [39, 40], "causal": 39, "short": [39, 40], "2012": 39, "05481": 39, "flag": 39, "indic": [39, 40], "whether": 39, "sequenc": [39, 40], "uniformli": [39, 40], "seen": [39, 40], "97vkxf80ru61cnp2alwzzg": 39, "streaming_decod": [39, 40], "acoust": [39, 40], "wise": [39, 40], "parallel": [39, 40], "bath": [39, 40], "parallelli": [39, 40], "seem": 39, "benefit": 39, "mismatch": 39, "mdoel": 39, "320m": 40, "550": 40, "scriptmodul": 40, "jit_trace_export": 40, "jit_trace_pretrain": 40, "task": 41}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 10, 15, 16], "creat": [2, 9], "recip": [2, 41], "data": [2, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "prepar": [2, 9, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "train": [2, 6, 9, 12, 13, 14, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "decod": [2, 9, 10, 14, 18, 20, 21, 23, 24, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "pre": [2, 6, 12, 13, 14, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "model": [2, 6, 10, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "frequent": 4, "ask": 4, "question": 4, "faq": 4, "oserror": 4, "libtorch_hip": 4, "so": 4, "cannot": 4, "open": 4, "share": 4, "object": 4, "file": [4, 14], "directori": 4, "attributeerror": 4, "modul": 4, "distutil": 4, "ha": 4, "attribut": 4, "version": 4, "importerror": 4, "libpython3": 4, "10": 4, "1": [4, 9, 12, 13, 18, 20, 21, 23], "0": [4, 9], "No": 4, "huggingfac": [5, 7], "space": 7, "youtub": [7, 9], "video": [7, 9], "icefal": [8, 9, 12, 13], "content": [8, 41], "instal": [9, 12, 13, 18, 20, 21, 23, 27, 31, 32], "pytorch": 9, "torchaudio": 9, "k2": 9, "2": [9, 12, 13, 18, 20, 21, 23], "lhots": 9, "3": [9, 12, 13, 18, 20, 23], "download": [9, 12, 13, 14, 18, 20, 21, 23, 26, 27, 28, 29, 31, 32, 34, 38, 39, 40], "exampl": [9, 14, 18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "virtual": 9, "environ": 9, "activ": 9, "your": 9, "4": [9, 12, 13], "5": [9, 12, 13], "test": [9, 12, 13], "export": [10, 11, 12, 13, 14, 15, 16, 17, 26, 28, 29, 38, 39, 40], "state_dict": [10, 26, 28, 29, 38, 39, 40], "when": [10, 15, 16], "us": [10, 15, 16, 26, 28, 29, 38, 39, 40], "run": 10, "py": 10, "ncnn": [11, 12, 13], "convemform": 12, "transduc": [12, 13, 20, 26, 38, 39, 40], "pnnx": [12, 13], "via": [12, 13], "torch": [12, 13, 15, 16, 26, 28, 29, 38, 39, 40], "jit": [12, 13, 15, 16, 26, 28, 29, 38, 39, 40], "trace": [12, 13, 16, 38, 40], "torchscript": [12, 13], "6": [12, 13], "modifi": [12, 13, 20], "encod": [12, 13], "sherpa": [12, 13, 26, 39, 40], "7": [12, 13], "option": [12, 13, 18, 21, 23, 26, 28, 29, 38, 39, 40], "int8": [12, 13], "quantiz": [12, 13], "lstm": [13, 21, 27, 32, 38], "onnx": 14, "sound": 14, "script": [15, 26, 28, 29, 39, 40], "conform": [18, 23, 36], "ctc": [18, 21, 23, 27, 28, 31, 32, 34], "configur": [18, 21, 23, 26, 28, 29, 38, 39, 40], "log": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "usag": [18, 20, 21, 23, 26, 28, 29, 38, 39, 40], "case": [18, 20, 21, 23], "kaldifeat": [18, 20, 21, 23, 27, 31, 32, 34], "hlg": [18, 21, 23], "attent": [18, 23], "rescor": [18, 23], "colab": [18, 20, 21, 23, 27, 31, 32, 34], "notebook": [18, 20, 21, 23, 27, 31, 32, 34], "deploy": [18, 23], "c": [18, 23], "aishel": 19, "stateless": 20, "The": 20, "loss": 20, "todo": 20, "greedi": 20, "search": 20, "beam": 20, "tdnn": [21, 27, 31, 32, 34], "non": 22, "stream": [22, 35, 36, 39, 40], "asr": [22, 35], "lm": 23, "comput": 23, "wer": 23, "n": 23, "gram": 23, "distil": 24, "hubert": 24, "codebook": 24, "index": 24, "librispeech": [25, 37], "prune": [26, 39], "statelessx": [26, 39], "pretrain": [26, 28, 29, 38, 39, 40], "deploi": [26, 39, 40], "infer": [27, 31, 32, 34], "zipform": [28, 29, 40], "blank": 28, "skip": 28, "mmi": 29, "timit": 30, "ligru": 31, "yesno": 33, "introduct": 36, "emform": 36, "which": 38, "simul": [39, 40], "real": [39, 40], "tabl": 41}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [20, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [18, "training"], [20, "training"], [21, "training"], [23, "training"], [24, "training"], [26, "training"], [27, "training"], [28, "training"], [29, "training"], [31, "training"], [32, "training"], [34, "training"], [38, "training"], [39, "training"], [40, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [18, "decoding"], [20, "decoding"], [21, "decoding"], [23, "decoding"], [24, "decoding"], [26, "decoding"], [27, "decoding"], [28, "decoding"], [29, "decoding"], [31, "decoding"], [32, "decoding"], [34, "decoding"], [38, "decoding"], [39, "decoding"], [40, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "Frequently Asked Questions (FAQs)": [[4, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[4, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[4, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[4, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "Huggingface": [[5, "huggingface"]], "Pre-trained models": [[6, "pre-trained-models"]], "Huggingface spaces": [[7, "huggingface-spaces"]], "YouTube Video": [[7, "youtube-video"], [9, "youtube-video"]], "Icefall": [[8, "icefall"]], "Contents:": [[8, null]], "Installation": [[9, "installation"]], "(0) Install PyTorch and torchaudio": [[9, "install-pytorch-and-torchaudio"]], "(1) Install k2": [[9, "install-k2"]], "(2) Install lhotse": [[9, "install-lhotse"]], "(3) Download icefall": [[9, "download-icefall"]], "Installation example": [[9, "installation-example"]], "(1) Create a virtual environment": [[9, "create-a-virtual-environment"]], "(2) Activate your virtual environment": [[9, "activate-your-virtual-environment"]], "(3) Install k2": [[9, "id1"]], "(4) Install lhotse": [[9, "id2"]], "(5) Download icefall": [[9, "id3"]], "Test Your Installation": [[9, "test-your-installation"]], "Data preparation": [[9, "data-preparation"], [18, "data-preparation"], [21, "data-preparation"], [23, "data-preparation"], [24, "data-preparation"], [26, "data-preparation"], [27, "data-preparation"], [28, "data-preparation"], [29, "data-preparation"], [31, "data-preparation"], [32, "data-preparation"], [34, "data-preparation"], [38, "data-preparation"], [39, "data-preparation"], [40, "data-preparation"]], "Export model.state_dict()": [[10, "export-model-state-dict"], [26, "export-model-state-dict"], [28, "export-model-state-dict"], [29, "export-model-state-dict"], [38, "export-model-state-dict"], [39, "export-model-state-dict"], [40, "export-model-state-dict"]], "When to use it": [[10, "when-to-use-it"], [15, "when-to-use-it"], [16, "when-to-use-it"]], "How to export": [[10, "how-to-export"], [15, "how-to-export"], [16, "how-to-export"]], "How to use the exported model": [[10, "how-to-use-the-exported-model"], [15, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[10, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[11, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[12, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[12, "download-the-pre-trained-model"], [13, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[12, "install-ncnn-and-pnnx"], [13, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[12, "export-the-model-via-torch-jit-trace"], [13, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[12, "export-torchscript-model-via-pnnx"], [13, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[12, "test-the-exported-models-in-icefall"], [13, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[12, "modify-the-exported-encoder-for-sherpa-ncnn"], [13, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[12, "optional-int8-quantization-with-sherpa-ncnn"], [13, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[13, "export-lstm-transducer-models-to-ncnn"]], "Export to ONNX": [[14, "export-to-onnx"]], "Example": [[14, "example"]], "Download the pre-trained model": [[14, "download-the-pre-trained-model"], [18, "download-the-pre-trained-model"], [20, "download-the-pre-trained-model"], [21, "download-the-pre-trained-model"], [23, "download-the-pre-trained-model"], [27, "download-the-pre-trained-model"], [31, "download-the-pre-trained-model"], [32, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"]], "Export the model to ONNX": [[14, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[14, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[15, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[16, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[16, "how-to-use-the-exported-models"]], "Model export": [[17, "model-export"]], "Conformer CTC": [[18, "conformer-ctc"], [23, "conformer-ctc"]], "Configurable options": [[18, "configurable-options"], [21, "configurable-options"], [23, "configurable-options"], [26, "configurable-options"], [28, "configurable-options"], [29, "configurable-options"], [38, "configurable-options"], [39, "configurable-options"], [40, "configurable-options"]], "Pre-configured options": [[18, "pre-configured-options"], [21, "pre-configured-options"], [23, "pre-configured-options"], [26, "pre-configured-options"], [28, "pre-configured-options"], [29, "pre-configured-options"], [38, "pre-configured-options"], [39, "pre-configured-options"], [40, "pre-configured-options"]], "Training logs": [[18, "training-logs"], [20, "training-logs"], [21, "training-logs"], [23, "training-logs"], [26, "training-logs"], [28, "training-logs"], [29, "training-logs"], [38, "training-logs"], [39, "training-logs"], [40, "training-logs"]], "Usage examples": [[18, "usage-examples"], [20, "usage-examples"], [21, "usage-examples"], [23, "usage-examples"]], "Case 1": [[18, "case-1"], [20, "case-1"], [21, "case-1"], [23, "case-1"]], "Case 2": [[18, "case-2"], [20, "case-2"], [21, "case-2"], [23, "case-2"]], "Case 3": [[18, "case-3"], [20, "case-3"], [23, "case-3"]], "Pre-trained Model": [[18, "pre-trained-model"], [20, "pre-trained-model"], [21, "pre-trained-model"], [23, "pre-trained-model"], [27, "pre-trained-model"], [31, "pre-trained-model"], [32, "pre-trained-model"], [34, "pre-trained-model"]], "Install kaldifeat": [[18, "install-kaldifeat"], [20, "install-kaldifeat"], [21, "install-kaldifeat"], [23, "install-kaldifeat"], [27, "install-kaldifeat"], [31, "install-kaldifeat"], [32, "install-kaldifeat"]], "Usage": [[18, "usage"], [20, "usage"], [21, "usage"], [23, "usage"]], "CTC decoding": [[18, "ctc-decoding"], [23, "ctc-decoding"], [23, "id2"]], "HLG decoding": [[18, "hlg-decoding"], [18, "id2"], [21, "hlg-decoding"], [23, "hlg-decoding"], [23, "id3"]], "HLG decoding + attention decoder rescoring": [[18, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[18, "colab-notebook"], [20, "colab-notebook"], [21, "colab-notebook"], [23, "colab-notebook"], [27, "colab-notebook"], [31, "colab-notebook"], [32, "colab-notebook"], [34, "colab-notebook"]], "Deployment with C++": [[18, "deployment-with-c"], [23, "deployment-with-c"]], "aishell": [[19, "aishell"]], "Stateless Transducer": [[20, "stateless-transducer"]], "The Model": [[20, "the-model"]], "The Loss": [[20, "the-loss"]], "Todo": [[20, "id1"]], "Greedy search": [[20, "greedy-search"]], "Beam search": [[20, "beam-search"]], "Modified Beam search": [[20, "modified-beam-search"]], "TDNN-LSTM CTC": [[21, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[22, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[23, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[23, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[23, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[23, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[23, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[24, "distillation-with-hubert"]], "Codebook index preparation": [[24, "codebook-index-preparation"]], "LibriSpeech": [[25, "librispeech"], [37, "librispeech"]], "Pruned transducer statelessX": [[26, "pruned-transducer-statelessx"], [39, "pruned-transducer-statelessx"]], "Usage example": [[26, "usage-example"], [28, "usage-example"], [29, "usage-example"], [38, "usage-example"], [39, "usage-example"], [40, "usage-example"]], "Export Model": [[26, "export-model"], [39, "export-model"], [40, "export-model"]], "Export model using torch.jit.script()": [[26, "export-model-using-torch-jit-script"], [28, "export-model-using-torch-jit-script"], [29, "export-model-using-torch-jit-script"], [39, "export-model-using-torch-jit-script"], [40, "export-model-using-torch-jit-script"]], "Download pretrained models": [[26, "download-pretrained-models"], [28, "download-pretrained-models"], [29, "download-pretrained-models"], [38, "download-pretrained-models"], [39, "download-pretrained-models"], [40, "download-pretrained-models"]], "Deploy with Sherpa": [[26, "deploy-with-sherpa"], [39, "deploy-with-sherpa"], [40, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[27, "tdnn-lstm-ctc"], [32, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[27, "inference-with-a-pre-trained-model"], [31, "inference-with-a-pre-trained-model"], [32, "inference-with-a-pre-trained-model"], [34, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[28, "zipformer-ctc-blank-skip"]], "Export models": [[28, "export-models"], [29, "export-models"], [38, "export-models"]], "Zipformer MMI": [[29, "zipformer-mmi"]], "TIMIT": [[30, "timit"]], "TDNN-LiGRU-CTC": [[31, "tdnn-ligru-ctc"]], "YesNo": [[33, "yesno"]], "TDNN-CTC": [[34, "tdnn-ctc"]], "Download kaldifeat": [[34, "download-kaldifeat"]], "Streaming ASR": [[35, "streaming-asr"]], "Introduction": [[36, "introduction"]], "Streaming Conformer": [[36, "streaming-conformer"]], "Streaming Emformer": [[36, "streaming-emformer"]], "LSTM Transducer": [[38, "lstm-transducer"]], "Which model to use": [[38, "which-model-to-use"]], "Export model using torch.jit.trace()": [[38, "export-model-using-torch-jit-trace"], [40, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[39, "simulate-streaming-decoding"], [40, "simulate-streaming-decoding"]], "Real streaming decoding": [[39, "real-streaming-decoding"], [40, "real-streaming-decoding"]], "Zipformer Transducer": [[40, "zipformer-transducer"]], "Recipes": [[41, "recipes"]], "Table of Contents": [[41, null]]}, "indexentries": {}})