Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "us": [0, 1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 24, 25, 28, 32, 33, 35, 37], "tool": [0, 4, 12], "make": [0, 1, 3, 12, 13, 14, 19, 21, 24, 37], "consist": [0, 21, 27, 39, 40, 41], "possibl": [0, 2, 3, 9, 19, 24], "black": 0, "format": [0, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "flake8": 0, "check": [0, 24], "qualiti": [0, 20], "isort": 0, "sort": [0, 9], "import": [0, 4, 12, 40, 41], "The": [0, 1, 2, 4, 7, 9, 10, 12, 13, 14, 19, 20, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "version": [0, 8, 9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 32, 33, 40], "abov": [0, 4, 9, 10, 12, 13, 14, 15, 19, 20, 21, 22, 24, 27, 29, 30, 35, 37, 39, 40, 41], "ar": [0, 1, 3, 4, 9, 10, 12, 13, 14, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41, 42], "22": [0, 12, 13, 24, 32, 33, 35], "3": [0, 4, 8, 10, 11, 15, 18, 22, 25, 27, 28, 29, 30, 35, 39, 40, 41], "0": [0, 1, 8, 10, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "5": [0, 11, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "4": [0, 4, 8, 10, 11, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "10": [0, 8, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "1": [0, 8, 10, 11, 15, 16, 17, 18, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "after": [0, 1, 7, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "run": [0, 2, 4, 7, 9, 12, 13, 14, 15, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "command": [0, 1, 4, 9, 10, 12, 13, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "git": [0, 9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 28, 32, 33, 35], "clone": [0, 9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 28, 32, 33, 35], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "github": [0, 2, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "com": [0, 2, 6, 7, 9, 10, 12, 13, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "k2": [0, 2, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 39, 40, 41], "fsa": [0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 24, 27, 29, 30, 39, 40, 41], "icefal": [0, 2, 3, 4, 6, 7, 10, 11, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "cd": [0, 1, 2, 4, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "pip": [0, 1, 4, 9, 12, 15, 21], "instal": [0, 1, 4, 5, 7, 8, 10, 11, 15, 18, 25, 27, 29, 30, 35, 39, 40, 41], "pre": [0, 3, 5, 7, 8, 9, 11, 18, 25], "commit": 0, "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "automat": [0, 7, 25], "hook": 0, "invok": 0, "fail": [0, 9], "If": [0, 2, 4, 7, 12, 13, 14, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "ani": [0, 9, 19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40], "your": [0, 1, 2, 5, 7, 8, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "wa": [0, 9, 10, 24, 28], "success": [0, 9, 12, 13], "pleas": [0, 1, 2, 4, 7, 9, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "fix": [0, 4, 9, 12, 13, 14, 24], "issu": [0, 4, 9, 12, 13, 24, 25, 40, 41], "report": [0, 4, 9, 25], "some": [0, 1, 10, 12, 13, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "i": [0, 1, 2, 4, 7, 9, 10, 11, 12, 13, 14, 15, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "e": [0, 2, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "modifi": [0, 11, 18, 19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "file": [0, 2, 7, 8, 10, 12, 13, 14, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "place": [0, 9, 10, 21, 24, 28], "so": [0, 7, 8, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "statu": 0, "failur": 0, "see": [0, 1, 7, 9, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "which": [0, 2, 7, 10, 12, 13, 14, 15, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 40, 41], "ha": [0, 2, 8, 11, 12, 13, 14, 15, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 37, 39, 40, 41], "been": [0, 11, 12, 13, 14, 21], "befor": [0, 1, 10, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "further": 0, "chang": [0, 4, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "all": [0, 6, 7, 10, 12, 13, 14, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "again": [0, 12, 13, 35], "should": [0, 2, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "succe": 0, "thi": [0, 2, 3, 4, 5, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "time": [0, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "succeed": 0, "want": [0, 9, 10, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "can": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "do": [0, 2, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "Or": 0, "without": [0, 5, 7, 19, 24], "your_changed_fil": 0, "py": [0, 2, 4, 9, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 6, 7, 9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "prepar": [1, 3, 10], "environ": [1, 4, 12, 13, 14, 19, 20, 21, 22, 24, 25, 27, 28, 32, 33, 35, 40, 41], "doc": [1, 10, 37], "r": [1, 9, 12, 13, 14, 32, 33], "requir": [1, 9, 25, 40, 41], "txt": [1, 9, 12, 13, 14, 15, 19, 21, 22, 24, 28, 32, 33, 35], "set": [1, 4, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "up": [1, 9, 10, 12, 13, 14, 19, 22, 24, 25, 27, 28, 29, 30, 40, 41], "readi": [1, 19, 24, 25], "refer": [1, 2, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 22, 24, 27, 28, 29, 32, 33, 35, 37, 40, 41], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 10, 12, 13, 14, 19, 21, 24], "local": [1, 9, 27, 29, 30, 39, 40, 41], "preview": 1, "what": [1, 2, 9, 12, 13, 14, 21, 37], "look": [1, 2, 6, 9, 12, 13, 14, 19, 21, 22, 24, 25], "like": [1, 2, 7, 9, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 35, 37, 39, 40], "publish": [1, 10, 20], "html": [1, 2, 4, 9, 11, 12, 13, 14, 15, 16, 17, 27, 39, 40, 41], "gener": [1, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "view": [1, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 35, 39, 40, 41], "follow": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "python3": [1, 4, 9, 13, 14], "m": [1, 9, 12, 13, 14, 21, 27, 29, 30, 32, 33, 39, 40, 41], "server": [1, 7, 9, 39], "It": [1, 2, 5, 9, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "print": [1, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "serv": [1, 27, 29, 30, 39, 40, 41], "port": [1, 25, 27, 29, 30, 39, 40, 41], "8000": [1, 35], "open": [1, 8, 10, 12, 13, 14, 20, 21, 24, 25], "browser": [1, 5, 7, 27, 29, 30, 39, 40, 41], "go": [1, 9, 19, 21, 24, 27, 29, 30, 39, 40, 41], "read": [2, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "code": [2, 3, 4, 8, 12, 13, 14, 19, 24, 25, 27, 28, 32, 33, 35, 37, 40, 41], "style": [2, 3, 8], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 24, 27, 29, 30, 39, 40, 41], "recommend": [2, 9, 19, 21, 22, 24, 25, 27, 40, 41], "test": [2, 8, 10, 11, 18, 19, 21, 22, 24, 25, 28, 29, 32, 33], "valid": [2, 9, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "dataset": [2, 4, 9, 10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "lhots": [2, 8, 10, 12, 13, 14, 19, 21, 24], "readthedoc": [2, 9], "io": [2, 9, 11, 12, 13, 14, 15, 16, 17, 27, 39, 40, 41], "en": [2, 9, 12], "latest": [2, 7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "index": [2, 9, 11, 12, 13, 14, 15, 16, 17, 39, 40, 41], "yesno": [2, 4, 8, 9, 23, 35, 42], "veri": [2, 3, 12, 13, 14, 21, 32, 33, 35, 40, 41], "good": 2, "exampl": [2, 7, 8, 10, 12, 13, 14, 16, 17, 18, 25, 28, 32, 33, 35], "speech": [2, 7, 8, 9, 11, 20, 21, 35, 42], "pull": [2, 12, 13, 14, 15, 19, 21, 24, 37], "380": [2, 12, 33], "show": [2, 7, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "add": [2, 12, 13, 14, 19, 21, 22, 40, 42], "new": [2, 3, 7, 9, 12, 13, 14, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 35, 39, 40, 41], "suppos": [2, 40, 41], "would": [2, 9, 10, 12, 13, 14, 24, 28, 40, 41], "name": [2, 4, 10, 12, 13, 14, 15, 19, 21, 27, 29, 30, 40, 41], "foo": [2, 17, 19, 24, 27, 29, 30, 39, 40, 41], "eg": [2, 4, 6, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "mkdir": [2, 12, 13, 19, 21, 22, 24, 28, 32, 33, 35], "p": [2, 9, 12, 13, 21, 32, 33], "asr": [2, 4, 6, 8, 9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "touch": 2, "sh": [2, 9, 10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "chmod": 2, "x": [2, 14, 37], "simpl": [2, 21], "own": [2, 25, 27, 40, 41], "otherwis": [2, 12, 13, 14, 19, 21, 24, 25, 27, 29, 30, 39, 40, 41], "librispeech": [2, 4, 6, 8, 10, 12, 13, 14, 15, 16, 17, 23, 24, 25, 27, 28, 29, 30, 36, 37, 39, 40, 41, 42], "assum": [2, 9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 28, 32, 33, 35, 39, 40, 41], "fanci": 2, "call": [2, 4, 15, 25], "bar": [2, 17, 19, 24, 27, 29, 30, 39, 40, 41], "organ": 2, "wai": [2, 3, 18, 27, 29, 30, 37, 39, 40, 41], "readm": [2, 19, 21, 22, 24, 28, 32, 33, 35], "md": [2, 6, 10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "asr_datamodul": [2, 4, 9], "pretrain": [2, 10, 12, 13, 14, 15, 17, 19, 21, 22, 24, 28, 32, 33, 35], "For": [2, 4, 6, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "instanc": [2, 4, 6, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "tdnn": [2, 4, 9, 20, 23, 26, 31, 34], "its": [2, 10, 11, 12, 13, 14, 17, 21, 29], "directori": [2, 8, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "structur": [2, 14], "descript": [2, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "contain": [2, 8, 10, 11, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41, 42], "inform": [2, 10, 19, 21, 22, 24, 27, 28, 29, 32, 33, 35, 37, 39, 40, 41], "g": [2, 9, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "wer": [2, 9, 10, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "etc": [2, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "provid": [2, 7, 9, 10, 11, 12, 13, 14, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41, 42], "pytorch": [2, 4, 8, 12, 13, 14, 21], "dataload": [2, 9], "take": [2, 10, 25, 27, 35, 40, 41], "input": [2, 10, 12, 13, 14, 19, 21, 22, 24, 28, 32, 33, 35, 37], "checkpoint": [2, 9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "save": [2, 9, 10, 13, 14, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "dure": [2, 4, 7, 10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "stage": [2, 9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "": [2, 9, 10, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "definit": [2, 12, 13], "neural": [2, 19, 24], "network": [2, 19, 21, 24, 27, 29, 30, 39, 40, 41], "script": [2, 8, 9, 17, 18, 19, 21, 22, 24, 25, 28, 32, 33, 35, 39], "infer": [2, 10, 12, 13], "tdnn_lstm_ctc": [2, 22, 28, 33], "conformer_ctc": [2, 19, 24], "get": [2, 7, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 35, 37, 39, 40, 41], "feel": [2, 25, 39], "result": [2, 6, 7, 9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "everi": [2, 10, 27, 29, 30, 39, 40, 41], "kept": [2, 27, 40, 41], "self": [2, 11, 14, 37], "toler": 2, "duplic": 2, "among": [2, 9], "differ": [2, 9, 12, 13, 14, 15, 19, 20, 24, 25, 27, 37, 39, 40, 41], "invoc": [2, 12, 13], "help": [2, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "blob": [2, 6, 10, 17, 27, 29, 30, 39, 40, 41], "master": [2, 6, 10, 13, 14, 16, 17, 21, 25, 27, 29, 30, 39, 40, 41], "transform": [2, 19, 24, 39], "conform": [2, 16, 20, 21, 23, 26, 27, 29, 39, 40, 41], "base": [2, 14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "lstm": [2, 11, 17, 18, 20, 23, 26, 31, 36, 38], "attent": [2, 14, 21, 22, 25, 37, 40, 41], "lm": [2, 9, 21, 27, 28, 32, 33, 35, 40, 41], "rescor": [2, 22, 28, 30, 32, 33, 35], "demonstr": [2, 5, 7, 10, 15], "consid": [2, 14], "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "mani": [3, 40, 41], "two": [3, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "them": [3, 5, 6, 7, 9, 12, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "To": [3, 7, 9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "document": [3, 8, 10, 11, 12, 13, 14, 15, 30], "repositori": [3, 12, 13, 14, 15], "recip": [3, 6, 8, 9, 10, 15, 19, 21, 22, 24, 25, 27, 28, 32, 33, 35, 37, 39, 40, 41], "In": [3, 4, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 28, 32, 33, 35, 37], "page": [3, 7, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "describ": [3, 5, 10, 12, 13, 15, 16, 17, 18, 19, 21, 22, 24, 27, 28, 32, 33, 40, 41], "how": [3, 5, 7, 8, 9, 12, 13, 14, 15, 18, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "creat": [3, 8, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40], "data": [3, 10, 12, 13, 14, 15, 16, 17, 20], "train": [3, 4, 5, 7, 8, 10, 11, 16, 17, 18, 37], "decod": [3, 4, 7, 12, 13, 14, 17, 18], "model": [3, 5, 7, 8, 9, 11, 25, 37], "section": [4, 5, 9, 10, 15, 16, 17, 18, 19, 24], "collect": [4, 9], "user": [4, 9], "post": 4, "correspond": [4, 6, 7], "solut": 4, "One": 4, "torch": [4, 8, 9, 10, 11, 18, 19, 21, 24], "torchaudio": [4, 8, 37], "cu111": 4, "torchvis": 4, "11": [4, 9, 12, 13, 15, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "f": [4, 9, 32, 33], "download": [4, 7, 8, 11, 18, 20, 25], "org": [4, 9, 20, 21, 27, 39, 40, 41], "whl": [4, 9], "torch_stabl": [4, 9], "throw": [4, 12, 13, 14], "error": [4, 9, 12, 13, 14, 24], "when": [4, 7, 12, 13, 14, 18, 21, 24, 25, 27, 29, 30, 40, 41], "specifi": [4, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "cuda": [4, 8, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 39, 40, 41], "while": [4, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "That": [4, 12, 13, 25, 27, 39, 40, 41], "cu11": 4, "therefor": 4, "correct": 4, "log": [4, 9, 12, 13, 14, 28, 32, 33, 35], "traceback": 4, "most": [4, 40, 41], "recent": [4, 12, 13, 14], "last": 4, "line": [4, 9, 12, 13, 14, 27, 40, 41], "14": [4, 9, 10, 12, 13, 16, 19, 24, 27, 28, 29, 32, 39, 40, 41], "from": [4, 5, 7, 9, 10, 12, 13, 14, 15, 19, 20, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "yesnoasrdatamodul": 4, "home": [4, 12, 13, 19, 24], "xxx": [4, 10, 12, 13, 14], "next": [4, 7, 9, 12, 13, 14, 24, 25, 27, 28, 29, 30, 39, 40, 41], "gen": [4, 7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "kaldi": [4, 7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "34": [4, 9, 12, 13], "datamodul": 4, "__init__": [4, 10, 12, 13, 14, 19, 21, 24], "23": [4, 9, 12, 13, 14, 19, 21, 22, 24, 32, 33, 35], "util": [4, 9, 24], "add_eo": 4, "add_so": 4, "get_text": 4, "39": [4, 9, 12, 14, 21, 24, 28, 32], "tensorboard": [4, 9, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "summarywrit": 4, "miniconda3": 4, "env": 4, "yyi": 4, "lib": [4, 9, 14], "8": [4, 9, 10, 12, 13, 14, 19, 21, 24, 25, 27, 28, 29, 30, 35, 39, 40, 41], "site": [4, 9, 14], "packag": [4, 9, 14], "loosevers": 4, "uninstal": 4, "setuptool": [4, 9], "58": [4, 24], "conda": [4, 9], "encount": [4, 9, 14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "dev": [4, 9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "yangyifan": 4, "anaconda3": 4, "dev20230112": 4, "cuda11": 4, "6": [4, 9, 11, 18, 19, 21, 24, 27, 28, 32, 33, 39], "torch1": [4, 9], "13": [4, 9, 10, 12, 13, 14, 21, 22, 24, 28, 29, 32], "py3": [4, 9], "linux": [4, 7, 11, 12, 13, 14, 15], "x86_64": [4, 12], "egg": 4, "24": [4, 9, 12, 13, 22, 28, 32, 33, 35], "_k2": [4, 9], "determinizeweightpushingtyp": 4, "handl": [4, 19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "except": [4, 10], "anoth": 4, "occur": 4, "pruned_transducer_stateless7_ctc_b": [4, 29], "104": 4, "30": [4, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "rais": 4, "note": [4, 10, 12, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "re": [4, 19, 22, 24, 25, 27, 29, 30, 37, 39, 40, 41], "anaconda": 4, "maco": [4, 7, 11, 12, 13, 14, 15], "probabl": [4, 9, 21, 27, 29, 39, 40, 41], "variabl": [4, 9, 12, 13, 14, 19, 22, 24, 25, 27, 29, 30, 39, 40, 41], "export": [4, 8, 9, 19, 21, 22, 24, 25, 28, 32, 33, 35], "dyld_library_path": 4, "conda_prefix": 4, "first": [4, 9, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "try": [4, 5, 7, 25, 27, 29, 30, 39, 40, 41], "find": [4, 5, 6, 7, 9, 10, 12, 13, 14, 17, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "where": [4, 40], "locat": [4, 12], "libpython": 4, "abl": 4, "insid": [4, 17], "codna_prefix": 4, "ld_library_path": 4, "also": [5, 6, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 22, 24, 27, 29, 30, 35, 37, 39, 40, 41], "within": [5, 7, 12, 13], "anyth": [5, 7], "space": [5, 8], "youtub": [5, 8, 24, 25, 27, 28, 29, 30, 39, 40, 41], "video": [5, 8, 24, 25, 27, 28, 29, 30, 39, 40, 41], "upload": [6, 7, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "huggingfac": [6, 8, 10, 12, 13, 14, 15, 19, 21, 22, 24, 28, 29, 30, 32, 33, 35, 39], "co": [6, 7, 10, 12, 13, 14, 15, 19, 20, 21, 22, 24, 28, 29, 30, 32, 33, 35, 39], "visit": [6, 7, 27, 29, 30, 39, 40, 41], "link": [6, 9, 10, 11, 27, 29, 30, 39, 40, 41], "search": [6, 7], "specif": [6, 15, 21], "aishel": [6, 8, 19, 21, 22, 23, 42], "gigaspeech": [6, 16, 39], "wenetspeech": [6, 16], "integr": 7, "framework": [7, 27, 40], "sherpa": [7, 11, 16, 17, 18, 39], "need": [7, 9, 10, 11, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "window": [7, 11, 12, 13, 14, 15], "even": [7, 9, 13], "ipad": 7, "phone": 7, "start": [7, 9, 10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "address": [7, 10, 12, 13, 14, 21, 27, 30, 39, 40, 41], "recognit": [7, 8, 11, 12, 13, 20, 21, 35, 42], "screenshot": [7, 19, 21, 22, 24, 25, 27, 35, 39, 40], "select": [7, 12, 13, 14, 27, 28, 32, 33, 35, 39, 40, 41], "languag": [7, 19, 21, 22], "current": [7, 9, 12, 13, 21, 25, 37, 39, 40, 41, 42], "chines": [7, 20, 21], "english": [7, 35, 39], "target": 7, "method": [7, 9, 10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 39, 40, 41], "greedi": 7, "modified_beam_search": [7, 21, 25, 27, 29, 39, 40, 41], "choos": [7, 9, 25, 27, 29, 30, 39, 40, 41], "number": [7, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "activ": 7, "path": [7, 10, 12, 13, 14, 17, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "either": [7, 19, 21, 22, 24, 40, 41], "record": [7, 13, 14, 19, 20, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "click": [7, 9, 19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "button": 7, "submit": 7, "wait": 7, "moment": 7, "an": [7, 9, 10, 12, 13, 14, 15, 16, 17, 19, 20, 21, 24, 25, 27, 30, 35, 39, 40, 41], "bottom": [7, 27, 29, 30, 39, 40, 41], "part": [7, 9, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "tabl": [7, 12, 13, 14], "one": [7, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "subscrib": [7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "channel": [7, 9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "nadira": [7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "povei": [7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "www": [7, 9, 20, 24, 25, 27, 28, 29, 30, 39, 40, 41], "uc_vaumpkminz1pnkfxan9mw": [7, 9, 24, 25, 27, 28, 29, 30, 39, 40, 41], "toolkit": 8, "cudnn": 8, "2": [8, 10, 11, 18, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "frequent": 8, "ask": 8, "question": 8, "faq": 8, "oserror": 8, "libtorch_hip": 8, "cannot": [8, 12, 13, 14], "share": [8, 9], "object": [8, 9, 19, 21, 22, 27, 35, 39, 40], "attributeerror": 8, "modul": [8, 9, 12, 14, 29, 40], "distutil": 8, "attribut": [8, 14, 24], "importerror": 8, "libpython3": 8, "No": [8, 12, 13, 14, 35], "state_dict": [8, 18, 19, 21, 22, 24, 28, 32, 33, 35], "jit": [8, 11, 18, 24], "trace": [8, 11, 16, 18], "onnx": [8, 10, 18], "ncnn": [8, 18], "non": [8, 24, 37, 40, 42], "stream": [8, 11, 12, 13, 15, 18, 19, 24, 32, 33, 39, 42], "timit": [8, 23, 32, 33, 42], "introduct": [8, 36, 42], "contribut": 8, "depend": [9, 19, 24], "step": [9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "99": [9, 12, 13, 14, 15], "who": 9, "about": [9, 12, 13, 14, 21, 25, 27, 30, 39, 40, 41], "suggest": [9, 27, 29, 30, 39, 40, 41], "virut": 9, "venv": 9, "my_env": 9, "sourc": [9, 10, 12, 13, 14, 19, 20, 21, 24], "bin": [9, 12, 13, 14, 19, 24], "order": [9, 12, 13, 14, 19, 22, 24, 28, 32, 33], "matter": [9, 12], "compil": [9, 12, 13, 19, 21, 24], "wheel": [9, 12], "same": [9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "don": [9, 12, 13, 14, 16, 19, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "t": [9, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "from_sourc": 9, "for_develop": 9, "alwai": [9, 10], "strongli": 9, "pythonpath": [9, 12, 13, 14], "point": [9, 10, 19, 22, 24, 25, 27, 29, 30, 39, 40, 41], "folder": [9, 10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "tmp": [9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "setup": [9, 12, 19, 21, 22, 24, 25, 27, 28, 32, 33, 35, 40, 41], "put": [9, 12, 13, 29, 40], "sever": [9, 10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "switch": [9, 19, 24, 30], "just": [9, 12, 13, 14, 37], "virtualenv": 9, "cpython3": 9, "final": [9, 10, 12, 13, 24, 28], "64": [9, 10, 12, 21, 40], "1540m": 9, "creator": 9, "cpython3posix": 9, "dest": 9, "ceph": [9, 10, 19, 21, 24], "fj": [9, 10, 12, 13, 14, 21, 24], "fangjun": [9, 10, 12, 13, 14, 21, 24], "clear": 9, "fals": [9, 10, 12, 13, 14, 19, 21, 24, 25], "no_vcs_ignor": 9, "global": 9, "seeder": 9, "fromappdata": 9, "bundl": 9, "via": [9, 11, 16, 17, 18], "copi": [9, 37], "app_data_dir": 9, "root": [9, 12, 13, 14], "v": [9, 12, 13, 14, 24, 32, 33], "irtualenv": 9, "ad": [9, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 35, 37, 39, 40, 41], "seed": 9, "21": [9, 10, 12, 19, 21, 24, 32, 33], "57": [9, 13, 24, 28], "36": [9, 12, 21, 24, 25], "bashactiv": 9, "cshellactiv": 9, "fishactiv": 9, "powershellactiv": 9, "pythonactiv": 9, "xonshactiv": 9, "dev20210822": 9, "cpu": [9, 10, 12, 13, 14, 16, 19, 27, 29, 30, 35, 40, 41], "9": [9, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 35, 39, 40, 41], "nightli": 9, "2bcpu": 9, "cp38": 9, "linux_x86_64": 9, "mb": [9, 12, 13, 14], "________________________________": 9, "185": [9, 19, 24, 35], "kb": [9, 12, 13, 14, 32, 33], "graphviz": 9, "17": [9, 10, 12, 13, 14, 19, 24, 32, 33, 39], "none": [9, 19, 24], "18": [9, 12, 13, 14, 19, 21, 22, 24, 27, 28, 32, 33, 39, 40, 41], "cach": [9, 14], "manylinux1_x86_64": 9, "831": [9, 21, 33], "type": [9, 10, 12, 13, 14, 19, 21, 24, 27, 29, 30, 35, 37, 39, 40, 41], "extens": 9, "typing_extens": 9, "26": [9, 12, 13, 14, 21, 24, 33], "successfulli": [9, 12, 13, 14], "req": 9, "7b1b76ge": 9, "q": 9, "audioread": 9, "soundfil": 9, "post1": 9, "py2": 9, "7": [9, 10, 11, 14, 18, 19, 22, 24, 27, 28, 32, 33, 39, 40], "97": [9, 12, 19], "cytoolz": 9, "manylinux_2_17_x86_64": 9, "manylinux2014_x86_64": 9, "dataclass": 9, "h5py": 9, "manylinux_2_12_x86_64": 9, "manylinux2010_x86_64": 9, "684": [9, 19, 35], "intervaltre": 9, "lilcom": 9, "numpi": 9, "15": [9, 10, 12, 13, 21, 22, 24, 32, 35], "40": [9, 12, 13, 14, 22, 24, 28, 32, 33], "pyyaml": 9, "662": 9, "tqdm": 9, "62": [9, 24, 28], "76": [9, 35], "73": 9, "alreadi": [9, 10], "satisfi": 9, "2a1410b": 9, "clean": [9, 14, 19, 21, 24, 25, 27, 28, 29, 30, 39, 40, 41], "toolz": 9, "55": [9, 12, 22, 24, 32], "sortedcontain": 9, "29": [9, 14, 15, 19, 21, 22, 24, 28, 29, 32, 33], "cffi": 9, "411": [9, 14, 24], "pycpars": 9, "20": [9, 10, 12, 14, 19, 21, 22, 24, 27, 28, 32, 33, 35, 40], "112": [9, 12, 13, 14], "pypars": 9, "67": 9, "done": [9, 10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "filenam": [9, 12, 13, 14, 15, 16, 17, 29, 30, 39, 41], "dev_2a1410b_clean": 9, "size": [9, 10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "342242": 9, "sha256": 9, "f683444afa4dc0881133206b4646a": 9, "9d0f774224cc84000f55d0a67f6e4a37997": 9, "store": [9, 24], "ephem": 9, "ftu0qysz": 9, "7f": 9, "7a": 9, "8e": 9, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 9, "warn": 9, "built": 9, "invalid": [9, 24], "metadata": [9, 32, 33], "mandat": 9, "pep": 9, "440": 9, "packa": 9, "ging": 9, "deprec": [9, 21], "legaci": 9, "becaus": 9, "could": [9, 12, 13, 14, 19, 22], "A": [9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 39, 40, 41], "replac": [9, 12, 13], "discuss": 9, "regard": 9, "pypa": 9, "sue": 9, "8368": 9, "inter": 9, "valtre": 9, "sor": 9, "tedcontain": 9, "remot": 9, "enumer": 9, "500": [9, 10, 12, 13, 14, 21, 24, 30, 39], "count": 9, "100": [9, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "compress": 9, "308": [9, 19, 21, 22], "total": [9, 13, 14, 19, 21, 22, 24, 25, 27, 28, 35, 39, 40], "delta": 9, "263": [9, 13], "reus": 9, "307": 9, "102": [9, 14, 19], "pack": [9, 40, 41], "receiv": 9, "172": 9, "49": [9, 12, 13, 24, 33, 35], "kib": 9, "385": 9, "00": [9, 12, 19, 21, 22, 24, 28, 32, 33, 35], "resolv": 9, "kaldilm": 9, "tar": 9, "gz": 9, "48": [9, 12, 13, 19, 21], "574": 9, "kaldialign": 9, "sentencepiec": [9, 24], "96": 9, "41": [9, 12, 14, 19, 21, 32, 35], "absl": 9, "absl_pi": 9, "132": 9, "googl": [9, 27, 29, 30, 39, 40, 41], "auth": 9, "oauthlib": 9, "google_auth_oauthlib": 9, "grpcio": 9, "ment": 9, "12": [9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 27, 29, 30, 32, 35, 39, 40, 41], "requi": 9, "rement": 9, "protobuf": 9, "manylinux_2_5_x86_64": 9, "werkzeug": 9, "288": 9, "tensorboard_data_serv": 9, "google_auth": 9, "35": [9, 10, 12, 13, 14, 21, 24, 39], "152": 9, "request": [9, 37], "plugin": 9, "wit": 9, "tensorboard_plugin_wit": 9, "781": 9, "markdown": 9, "six": 9, "16": [9, 10, 12, 13, 14, 17, 19, 21, 22, 24, 27, 28, 32, 33, 35, 39, 40, 41], "cachetool": 9, "rsa": 9, "pyasn1": 9, "pyasn1_modul": 9, "155": 9, "requests_oauthlib": 9, "77": [9, 24], "urllib3": 9, "27": [9, 12, 13, 14, 19, 21, 28, 33], "138": [9, 19, 21], "certifi": 9, "2017": 9, "2021": [9, 19, 22, 24, 28, 32, 33, 35], "145": 9, "charset": 9, "normal": [9, 28, 32, 33, 35, 40], "charset_norm": 9, "idna": 9, "59": [9, 12, 22, 24], "146": 9, "897233": 9, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 9, "f4c0d0cbc66eee6c88d68a63862": 9, "85": 9, "7d": 9, "63": [9, 21], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 9, "etool": 9, "oaut": 9, "hlib": 9, "let": [9, 12, 13, 14, 19, 24], "u": [9, 12, 13, 14, 19, 21, 22, 24, 25, 35], "08": [9, 14, 24, 28, 30, 32, 33, 35, 39], "19": [9, 10, 12, 13, 14, 19, 24, 28, 32, 33], "main": [9, 19, 24, 37], "dl_dir": [9, 19, 22, 24, 25, 27, 29, 30, 39, 40, 41], "waves_yesno": 9, "49mb": 9, "03": [9, 10, 13, 21, 24, 32, 33, 39], "39mb": 9, "manifest": [9, 25], "31": [9, 12, 13, 14, 24], "42": [9, 13, 19, 24, 35], "comput": [9, 10, 12, 13, 14, 19, 21, 22, 25, 27, 28, 30, 32, 33, 35, 39, 40, 41], "fbank": [9, 10, 12, 13, 14, 19, 21, 22, 24, 28, 32, 33, 35], "32": [9, 12, 13, 14, 15, 19, 21, 22, 41], "803": 9, "info": [9, 10, 12, 13, 14, 19, 21, 22, 24, 28, 32, 33, 35], "compute_fbank_yesno": 9, "52": [9, 19, 24], "process": [9, 10, 12, 13, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "extract": [9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "featur": [9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "_______________________________________________________________": 9, "90": [9, 12], "01": [9, 12, 21, 22, 24, 25, 29], "80": [9, 10, 12, 13, 14, 19, 21, 24], "57it": 9, "085": 9, "______________________________________________________________": 9, "248": [9, 21], "21it": 9, "lang": [9, 10, 21, 24, 30], "fcordre9": 9, "kaldilm_6899d26f2d684ad48f21025950cd2866": 9, "csrc": [9, 24], "arpa_file_pars": 9, "cc": 9, "void": 9, "arpafilepars": 9, "rea": 9, "d": [9, 32, 33], "std": 9, "istream": 9, "79": 9, "140": [9, 22], "gram": [9, 19, 21, 22, 27, 28, 30, 32, 33, 40, 41], "89": [9, 19], "hlg": [9, 28, 32, 33, 35], "928": 9, "compile_hlg": 9, "120": 9, "lang_phon": [9, 22, 28, 32, 33, 35], "929": [9, 21], "lexicon": [9, 19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "116": 9, "convert": [9, 12, 13, 14, 24], "l": [9, 12, 13, 14, 21, 32, 33, 35], "pt": [9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "linv": [9, 21, 24, 35], "931": 9, "ctc_topo": 9, "max_token_id": 9, "932": 9, "load": [9, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "fst": [9, 21, 35], "intersect": [9, 27, 40, 41], "933": 9, "lg": [9, 27, 30, 40, 41], "shape": [9, 14], "66": [9, 13], "connect": [9, 10, 24, 27, 28, 39, 40, 41], "68": [9, 24], "70": 9, "class": [9, 24], "tensor": [9, 13, 14, 19, 21, 22, 24, 27, 35, 39, 40], "71": [9, 24, 28], "determin": 9, "934": 9, "74": [9, 10], "raggedint": 9, "remov": [9, 19, 21, 22, 24, 28, 32, 33], "disambigu": 9, "symbol": [9, 21, 27, 40, 41], "87": [9, 12], "remove_epsilon": 9, "935": 9, "92": [9, 24], "arc": 9, "95": [9, 20], "compos": 9, "h": 9, "105": [9, 24], "936": 9, "107": [9, 13, 28], "123": 9, "now": [9, 12, 13, 14, 19, 24, 25, 27, 28, 29, 30, 32, 33, 39, 40, 41], "cuda_visible_devic": [9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "gpu": [9, 12, 13, 19, 21, 22, 24, 25, 27, 29, 30, 32, 33, 35, 39, 40, 41], "avail": [9, 10, 12, 13, 14, 19, 21, 24, 28, 32, 33, 35, 39], "case": [9, 10, 12, 13, 14, 27, 29, 30, 39, 40, 41], "segment": 9, "fault": 9, "core": 9, "dump": 9, "protocol_buffers_python_implement": 9, "more": [9, 12, 13, 14, 19, 24, 25, 35, 37, 39, 40], "674": 9, "interest": [9, 25, 27, 29, 30, 39, 40, 41], "given": [9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 40, 41], "below": [9, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40], "072": 9, "465": 9, "466": 9, "exp_dir": [9, 12, 13, 14, 21, 24, 25, 27, 29, 30, 40, 41], "posixpath": [9, 12, 13, 14, 21, 24], "exp": [9, 10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "lang_dir": [9, 21, 24], "lr": [9, 21, 39], "feature_dim": [9, 10, 12, 13, 14, 19, 21, 24, 35], "weight_decai": 9, "1e": 9, "06": [9, 10, 12, 22, 24, 28, 35], "start_epoch": 9, "best_train_loss": [9, 10, 12, 13, 14], "inf": [9, 10, 12, 13, 14], "best_valid_loss": [9, 10, 12, 13, 14], "best_train_epoch": [9, 10, 12, 13, 14], "best_valid_epoch": [9, 10, 13, 14], "batch_idx_train": [9, 10, 12, 13, 14], "log_interv": [9, 10, 12, 13, 14], "valid_interv": [9, 10, 12, 13, 14], "beam_siz": [9, 10, 21], "reduct": [9, 12, 13, 29], "sum": 9, "use_doub": 9, "le_scor": 9, "true": [9, 10, 12, 13, 14, 19, 21, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "world_siz": [9, 25], "master_port": 9, "12354": 9, "num_epoch": 9, "feature_dir": [9, 24], "max_dur": [9, 24], "bucketing_sampl": [9, 24], "num_bucket": [9, 24], "concatenate_cut": [9, 24], "duration_factor": [9, 24], "gap": [9, 24], "on_the_fly_feat": [9, 24], "shuffl": [9, 24], "return_cut": [9, 24], "num_work": [9, 24], "074": 9, "113": [9, 21, 24], "098": [9, 28], "cut": [9, 24], "240": [9, 19, 35], "149": [9, 12, 24], "200": [9, 10, 12, 13, 14, 19, 24, 25, 32, 33, 35], "singlecutsampl": 9, "206": [9, 24], "219": [9, 21, 24], "246": [9, 14, 21, 24, 32, 33], "357": [9, 14], "416": 9, "epoch": [9, 10, 12, 13, 14, 15, 16, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "batch": [9, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "avg": [9, 10, 12, 13, 14, 15, 16, 17, 21, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "loss": [9, 12, 13, 19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "0789": 9, "848": 9, "5356": 9, "7556": 9, "301": [9, 10, 24], "432": [9, 24], "9972": 9, "best": [9, 12, 13, 14, 19, 22, 24], "805": 9, "2436": 9, "5717": 9, "33": [9, 12, 13, 19, 20, 21, 24, 32], "109": [9, 19, 24], "4167": 9, "121": [9, 28], "325": [9, 13], "2214": 9, "798": [9, 21], "0781": 9, "1343": 9, "065": 9, "0859": 9, "556": 9, "0421": 9, "0975": 9, "810": 9, "0431": 9, "824": 9, "657": 9, "0109": 9, "984": [9, 24], "0093": 9, "0096": 9, "50": [9, 10, 12, 13, 14, 24, 27, 32, 39, 40, 41], "239": [9, 13, 21], "0104": 9, "0101": 9, "569": 9, "0092": 9, "819": [9, 32], "835": 9, "51": [9, 12, 19, 24, 35], "024": 9, "0105": 9, "317": [9, 13], "0099": 9, "0097": 9, "552": 9, "0108": 9, "869": 9, "0102": 9, "126": [9, 24], "128": [9, 24], "537": [9, 24], "192": [9, 14, 24], "249": [9, 13], "250": [9, 21, 28], "lm_dir": [9, 24], "search_beam": [9, 19, 24, 35], "output_beam": [9, 19, 24, 35], "min_active_st": [9, 19, 24, 35], "max_active_st": [9, 19, 24, 35], "10000": [9, 19, 24, 35], "use_double_scor": [9, 19, 24, 35], "193": 9, "213": [9, 35], "259": [9, 13, 19], "devic": [9, 10, 12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 40, 41], "217": [9, 19, 24], "279": [9, 24], "averag": [9, 10, 12, 13, 14, 15, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "userwarn": [9, 21], "floor_divid": 9, "futur": [9, 13, 14, 21, 42], "round": [9, 21], "toward": [9, 21], "trunc": [9, 21], "function": [9, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "NOT": [9, 19, 21, 24, 35], "floor": [9, 21], "incorrect": [9, 13, 14, 21], "neg": [9, 21], "valu": [9, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "keep": [9, 21, 27, 40, 41], "behavior": [9, 14, 21], "div": [9, 21], "b": [9, 21, 24, 32, 33], "rounding_mod": [9, 21], "actual": [9, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "divis": [9, 21], "trigger": 9, "intern": 9, "aten": 9, "src": [9, 12, 14], "nativ": 9, "binaryop": 9, "cpp": [9, 12, 16], "450": [9, 19, 21, 22], "k": [9, 14, 27, 32, 33, 39, 40, 41], "n": [9, 19, 25, 27, 29, 30, 32, 33, 39, 40, 41], "220": [9, 12, 21, 22, 24], "409": 9, "190": [9, 28], "until": [9, 24, 29], "571": [9, 24], "228": [9, 24], "transcript": [9, 19, 20, 21, 22, 24, 27, 28, 32, 33, 39, 40, 41], "recog": [9, 21, 24], "test_set": [9, 35], "572": 9, "ins": [9, 24, 35], "del": [9, 24, 35], "sub": [9, 24, 35], "573": 9, "236": 9, "wrote": [9, 24], "detail": [9, 11, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "stat": [9, 24], "err": [9, 21, 24], "299": 9, "congratul": [9, 12, 13, 14, 19, 22, 24, 28, 32, 33, 35], "fun": [9, 12, 13], "debug": 9, "variou": [9, 15, 18, 42], "problem": [9, 25], "mai": [9, 12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41, 42], "period": [10, 12], "disk": 10, "optim": [10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "other": [10, 13, 14, 15, 21, 24, 25, 27, 28, 32, 33, 35, 37, 40, 41, 42], "relat": [10, 19, 21, 24, 28, 32, 33, 35], "resum": [10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "howev": [10, 13, 25], "onli": [10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41, 42], "strip": 10, "reduc": [10, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "each": [10, 12, 13, 15, 19, 21, 22, 24, 27, 29, 30, 37, 39, 40, 41], "well": [10, 35, 42], "usag": [10, 12, 13, 14, 16, 17, 28, 32, 33, 35], "pruned_transducer_stateless3": [10, 16, 37], "almost": [10, 27, 37, 40, 41], "dir": [10, 12, 13, 14, 15, 16, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "bpe": [10, 12, 13, 14, 15, 16, 17, 24, 27, 29, 30, 39, 40, 41], "lang_bpe_500": [10, 12, 13, 14, 15, 16, 17, 24, 27, 29, 30, 39, 40, 41], "dict": [10, 14], "host": 10, "csukuangfj": [10, 12, 13, 15, 19, 21, 22, 24, 28, 32, 33, 35, 39], "prune": [10, 14, 15, 21, 23, 25, 26, 36, 37, 38, 39, 41], "transduc": [10, 11, 15, 18, 20, 23, 25, 26, 36, 37, 38], "stateless3": [10, 12], "2022": [10, 12, 13, 14, 15, 21, 27, 29, 30, 39, 40], "05": [10, 12, 13, 19, 21, 22, 24, 33], "lf": [10, 12, 13, 14, 15, 19, 21, 22, 24, 28, 30, 32, 33, 35], "repo": [10, 15], "prefix": 10, "those": 10, "wave": [10, 12, 13, 14, 19, 24], "iter": [10, 12, 13, 14, 17, 27, 29, 30, 39, 40, 41], "1224000": 10, "greedy_search": [10, 21, 27, 29, 39, 40, 41], "test_wav": [10, 12, 13, 14, 15, 19, 21, 22, 24, 28, 32, 33, 35], "1089": [10, 12, 13, 14, 15, 24, 28], "134686": [10, 12, 13, 14, 15, 24, 28], "0001": [10, 12, 13, 14, 15, 24, 28], "wav": [10, 12, 13, 14, 15, 17, 19, 21, 22, 24, 27, 29, 30, 32, 33, 35, 39, 40, 41], "1221": [10, 12, 13, 24, 28], "135766": [10, 12, 13, 24, 28], "0002": [10, 12, 13, 24, 28], "multipl": [10, 19, 21, 22, 24, 28, 32, 33, 35], "sound": [10, 12, 13, 14, 17, 18, 19, 21, 22, 24, 28, 32, 33, 35], "Its": [10, 12, 13, 14, 24], "output": [10, 12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "09": [10, 13, 19, 21, 22, 24, 39], "02": [10, 12, 13, 14, 21, 24, 27, 33, 39, 40], "233": [10, 12, 13], "265": 10, "reset_interv": [10, 12, 13, 14], "3000": [10, 12, 13, 14], "subsampling_factor": [10, 13, 14, 19, 21, 24], "encoder_dim": [10, 12, 13, 14], "512": [10, 12, 13, 14, 19, 21, 24], "nhead": [10, 12, 14, 19, 21, 24, 27, 40], "dim_feedforward": [10, 12, 13, 21], "2048": [10, 12, 13, 14, 21], "num_encoder_lay": [10, 12, 13, 14, 21], "decoder_dim": [10, 12, 13, 14], "joiner_dim": [10, 12, 13, 14], "model_warm_step": [10, 12, 13], "env_info": [10, 12, 13, 14, 19, 21, 24], "releas": [10, 12, 13, 14, 19, 21, 24], "sha1": [10, 12, 13, 14, 19, 21, 24], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 10, "date": [10, 12, 13, 14, 19, 21, 24], "fri": 10, "oct": [10, 24], "miss": [10, 12, 13, 14, 21, 24], "cu102": [10, 12, 13, 14], "branch": [10, 12, 13, 14, 19, 21, 24, 29], "1013": 10, "c39cba5": 10, "dirti": [10, 12, 13, 19, 24], "thu": [10, 12, 13, 14, 21, 24, 28], "jsonl": 10, "hostnam": [10, 12, 13, 14, 21], "de": [10, 12, 13, 14, 21], "74279": [10, 12, 13, 14, 21], "0324160024": 10, "65bfd8b584": 10, "jjlbn": 10, "ip": [10, 12, 13, 14, 21], "177": [10, 13, 14, 21, 22, 24], "203": [10, 24], "bpe_model": [10, 12, 13, 14, 24], "sound_fil": [10, 19, 21, 24, 35], "sample_r": [10, 19, 21, 24, 35], "16000": [10, 19, 21, 22, 24, 28, 29, 32, 33], "beam": [10, 39], "max_context": 10, "max_stat": 10, "context_s": [10, 12, 13, 14, 21], "max_sym_per_fram": [10, 21], "simulate_stream": 10, "decode_chunk_s": 10, "left_context": 10, "dynamic_chunk_train": 10, "causal_convolut": 10, "short_chunk_s": [10, 14, 40, 41], "25": [10, 12, 13, 19, 24, 27, 32, 33, 35, 40], "num_left_chunk": [10, 14], "blank_id": [10, 12, 13, 14, 21], "unk_id": 10, "vocab_s": [10, 12, 13, 14, 21], "271": [10, 13], "273": [10, 21], "612": 10, "458": 10, "disabl": [10, 12, 13], "giga": [10, 13, 39], "623": 10, "277": 10, "paramet": [10, 12, 13, 14, 16, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 39, 40, 41], "78648040": 10, "951": [10, 24], "285": [10, 21, 24], "construct": [10, 12, 13, 14, 19, 21, 22, 24, 28, 32, 33, 35], "952": 10, "295": [10, 19, 21, 22, 24], "957": 10, "700": 10, "329": [10, 13, 24], "912": 10, "388": 10, "earli": [10, 12, 13, 14, 24, 28], "nightfal": [10, 12, 13, 14, 24, 28], "THE": [10, 12, 13, 14, 24, 28], "yellow": [10, 12, 13, 14, 24, 28], "lamp": [10, 12, 13, 14, 24, 28], "light": [10, 12, 13, 14, 24, 28], "here": [10, 12, 13, 14, 19, 21, 22, 24, 25, 28, 37, 40], "AND": [10, 12, 13, 14, 24, 28], "THERE": [10, 12, 13, 14, 24, 28], "squalid": [10, 12, 13, 14, 24, 28], "quarter": [10, 12, 13, 14, 24, 28], "OF": [10, 12, 13, 14, 24, 28], "brothel": [10, 12, 13, 14, 24, 28], "god": [10, 24, 28], "AS": [10, 24, 28], "direct": [10, 24, 28], "consequ": [10, 24, 28], "sin": [10, 24, 28], "man": [10, 24, 28], "punish": [10, 24, 28], "had": [10, 24, 28], "her": [10, 24, 28], "love": [10, 24, 28], "child": [10, 24, 28], "whose": [10, 21, 24, 28], "ON": [10, 12, 24, 28], "THAT": [10, 24, 28], "dishonor": [10, 24, 28], "bosom": [10, 24, 28], "TO": [10, 24, 28], "parent": [10, 24, 28], "forev": [10, 24, 28], "WITH": [10, 24, 28], "race": [10, 24, 28], "descent": [10, 24, 28], "mortal": [10, 24, 28], "BE": [10, 24, 28], "bless": [10, 24, 28], "soul": [10, 24, 28], "IN": [10, 24, 28], "heaven": [10, 24, 28], "yet": [10, 12, 13, 24, 28], "THESE": [10, 24, 28], "thought": [10, 24, 28], "affect": [10, 24, 28], "hester": [10, 24, 28], "prynn": [10, 24, 28], "less": [10, 24, 28, 35, 40, 41], "hope": [10, 20, 24, 28], "than": [10, 13, 19, 21, 22, 24, 27, 28, 29, 30, 35, 39, 40, 41], "apprehens": [10, 24, 28], "390": 10, "down": [10, 19, 24, 27, 29, 30, 39, 40, 41], "reproduc": [10, 24], "ln": [10, 12, 13, 14, 15, 19, 24, 27, 29, 30, 39, 40, 41], "9999": [10, 29, 30, 39], "symlink": 10, "pass": [10, 14, 19, 21, 22, 24, 27, 29, 30, 37, 39, 40, 41], "max": [10, 12, 13, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "durat": [10, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "600": [10, 24, 27, 29, 39, 40, 41], "reason": [10, 12, 13, 14, 40], "support": [11, 12, 13, 14, 19, 21, 24, 27, 29, 30, 37, 39, 40, 41], "zipform": [11, 15, 18, 23, 26, 36, 38], "convemform": [11, 18, 37], "perform": [11, 21, 25, 40], "platform": [11, 15], "android": [11, 12, 13, 14, 15], "raspberri": [11, 15], "pi": [11, 15], "\u7231\u82af\u6d3e": 11, "maix": 11, "iii": 11, "axera": 11, "rv1126": 11, "static": 11, "produc": [11, 27, 29, 30, 39, 40, 41], "binari": [11, 12, 13, 14, 19, 21, 22, 24, 27, 35, 39, 40], "everyth": 11, "pnnx": [11, 18], "torchscript": [11, 16, 17, 18], "encod": [11, 15, 17, 18, 19, 21, 22, 24, 27, 28, 29, 35, 37, 39, 40, 41], "option": [11, 15, 18, 21, 25, 28, 32, 33, 35], "int8": [11, 18], "quantiz": [11, 18, 25], "zengwei": [12, 14, 15, 30, 39], "conv": [12, 13], "emform": [12, 13, 16], "stateless2": [12, 13, 39], "07": [12, 13, 14, 19, 21, 22, 24], "ubuntu": [12, 13, 14], "04": [12, 13, 14, 19, 21, 22, 24, 28, 32, 33], "work": [12, 13, 14, 24], "pretrained_model": [12, 13, 14], "online_transduc": 12, "continu": [12, 13, 14, 15, 19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "git_lfs_skip_smudg": [12, 13, 14, 15], "includ": [12, 13, 14, 15, 27, 29, 30, 39, 40, 41], "jit_xxx": [12, 13, 14], "anywher": [12, 13], "submodul": 12, "updat": [12, 13, 14], "recurs": 12, "init": 12, "cmake": [12, 13, 19, 24], "dcmake_build_typ": [12, 19, 24], "dncnn_python": 12, "dncnn_build_benchmark": 12, "off": 12, "dncnn_build_exampl": 12, "dncnn_build_tool": 12, "j4": 12, "pwd": 12, "compon": [12, 37], "execut": [12, 19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "ncnn2int8": [12, 13], "our": [12, 13, 14, 16, 17, 24, 25, 27, 37, 40, 41], "cpython": 12, "38": [12, 19, 21, 24, 32], "gnu": 12, "am": 12, "sai": [12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "But": [12, 27, 29, 30, 39, 40, 41], "doe": [12, 13, 14, 19, 21, 24, 35], "As": [12, 21, 24, 25], "long": 12, "later": [12, 13, 14, 19, 22, 24, 27, 28, 29, 30, 32, 33, 39, 40, 41], "termin": 12, "tencent": [12, 13], "made": 12, "modif": [12, 21], "offic": 12, "synchron": 12, "offici": 12, "renam": [12, 13, 14], "conv_emformer_transducer_stateless2": [12, 37], "num": [12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "layer": [12, 13, 14, 21, 25, 27, 37, 39, 40, 41], "chunk": [12, 14, 15, 40, 41], "length": [12, 14, 21, 40, 41], "cnn": [12, 14], "kernel": [12, 14, 21], "left": [12, 14, 21, 40, 41], "context": [12, 21, 27, 37, 39, 40, 41], "right": [12, 21, 37, 40], "memori": [12, 19, 21, 24, 37], "dim": [12, 13, 14, 21, 27, 40], "configur": [12, 14, 21, 25, 28, 32, 33, 35], "accordingli": [12, 13, 14], "yourself": [12, 13, 14, 25, 40, 41], "tune": [12, 13, 14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "combin": [12, 13, 14], "2023": [12, 13, 14, 29], "677": 12, "681": 12, "229": [12, 19], "best_v": 12, "alid_epoch": 12, "subsampl": [12, 40, 41], "ing_factor": 12, "a34171ed85605b0926eebbd0463d059431f4f74a": 12, "wed": [12, 19, 21, 24], "dec": 12, "ver": 12, "ion": 12, "530e8a1": 12, "tue": [12, 24], "star": [12, 13, 14], "op": 12, "1220120619": [12, 13, 14], "7695ff496b": [12, 13, 14], "s9n4w": [12, 13, 14], "127": [12, 13, 35], "icefa": 12, "ll": 12, "transdu": 12, "cer": 12, "use_averaged_model": [12, 13, 14], "cnn_module_kernel": [12, 14], "left_context_length": 12, "chunk_length": 12, "right_context_length": 12, "memory_s": 12, "231": [12, 13, 14], "053": 12, "022": 12, "708": [12, 19, 21, 24, 35], "315": [12, 19, 21, 22, 24, 28], "75490012": 12, "318": [12, 13], "320": [12, 21], "682": 12, "75": 12, "lh": [12, 13, 14], "rw": [12, 13, 14], "kuangfangjun": [12, 13, 14], "289m": 12, "jan": [12, 13, 14], "289": 12, "roughli": [12, 13, 14], "equal": [12, 13, 14, 40, 41], "1024": [12, 13, 14, 39], "287": [12, 35], "1010k": [12, 13], "decoder_jit_trac": [12, 13, 14, 17, 39, 41], "283m": 12, "encoder_jit_trac": [12, 13, 14, 17, 39, 41], "0m": [12, 13], "joiner_jit_trac": [12, 13, 14, 17, 39, 41], "sure": [12, 13, 14], "found": [12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "param": [12, 13, 14], "503k": [12, 13], "437": [12, 13, 14], "142m": 12, "79k": 12, "5m": [12, 13], "488": [12, 13, 14], "text": [12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "architectur": [12, 13, 14, 39], "editor": [12, 13, 14], "content": [12, 13, 14], "compar": [12, 13, 14, 40], "283": [12, 14], "1010": [12, 13], "142": [12, 19, 22, 24], "503": [12, 13], "convers": [12, 13, 14], "half": [12, 13, 14, 27, 40, 41], "joiner": [12, 13, 14, 15, 17, 21, 27, 39, 40, 41], "default": [12, 13, 14, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "float32": [12, 13, 14], "float16": [12, 13, 14], "occupi": [12, 13, 14], "byte": [12, 13, 14], "twice": [12, 13, 14], "smaller": [12, 13, 14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "fp16": [12, 13, 14, 27, 29, 30, 39, 40, 41], "won": [12, 13, 14, 15, 19, 22, 24, 25, 27, 29, 30, 39, 40, 41], "token": [12, 13, 14, 15, 19, 21, 22, 24, 28, 32, 33, 35], "accept": [12, 13, 14], "216": [12, 19, 24, 32, 33], "encoder_param_filenam": [12, 13, 14], "encoder_bin_filenam": [12, 13, 14], "decoder_param_filenam": [12, 13, 14], "decoder_bin_filenam": [12, 13, 14], "joiner_param_filenam": [12, 13, 14], "joiner_bin_filenam": [12, 13, 14], "sound_filenam": [12, 13, 14], "141": 12, "328": 12, "151": 12, "331": [12, 13, 24, 28], "176": [12, 21, 24], "336": 12, "106000": [12, 13, 14, 24, 28], "581": [12, 28], "381": 12, "few": [12, 13, 14, 25], "7767517": [12, 13, 14], "1060": 12, "1342": 12, "in0": [12, 13, 14], "explan": [12, 13, 14], "three": [12, 13, 14, 17, 19, 21, 37], "magic": [12, 13, 14], "intermedi": [12, 13, 14], "mean": [12, 13, 14, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 37, 39, 40, 41], "extra": [12, 13, 14, 21, 37, 40], "increment": [12, 13, 14], "1061": 12, "sherpametadata": [12, 13, 14], "sherpa_meta_data1": [12, 13, 14], "still": [12, 13, 14], "sinc": [12, 13, 14, 25, 35, 39], "newli": [12, 13, 14], "must": [12, 13, 14, 40], "kei": [12, 13, 14, 24], "eas": [12, 13, 14], "list": [12, 13, 14, 19, 21, 22, 24, 28, 32, 33], "pair": [12, 13, 14], "sad": [12, 13, 14], "rememb": [12, 13, 14], "anymor": [12, 13, 14], "flexibl": [12, 13, 14], "edit": [12, 13, 14], "arm": [12, 13, 14], "aarch64": [12, 13, 14], "onc": [12, 13], "mayb": [12, 13], "year": [12, 13], "_jit_trac": [12, 13], "56": [12, 13, 24, 32], "fp32": [12, 13], "doubl": [12, 13], "j": [12, 13, 19, 24], "scale": [12, 13, 19, 24, 25, 28, 30, 32, 33], "py38": [12, 13, 14], "arg": [12, 13], "wave_filenam": [12, 13], "16k": [12, 13], "hz": [12, 13, 32, 33], "mono": [12, 13], "calibr": [12, 13], "purpos": [12, 13], "cat": [12, 13], "eof": [12, 13], "calcul": [12, 13, 29, 40, 41], "has_gpu": [12, 13], "config": [12, 13], "use_vulkan_comput": [12, 13], "88": [12, 21], "conv_87": 12, "942385": [12, 13], "threshold": [12, 13, 29], "938493": 12, "968131": 12, "conv_88": 12, "442448": 12, "549335": 12, "167552": 12, "conv_89": 12, "228289": 12, "001738": 12, "871552": 12, "linear_90": 12, "976146": 12, "101789": 12, "115": [12, 13, 19, 24], "267128": 12, "linear_91": 12, "962030": 12, "162033": 12, "602713": 12, "linear_92": 12, "323041": 12, "853959": 12, "953129": 12, "linear_94": 12, "905416": 12, "648006": 12, "323545": 12, "linear_93": 12, "474093": 12, "200188": 12, "linear_95": 12, "888012": 12, "403563": 12, "483986": 12, "linear_96": 12, "856741": 12, "398679": 12, "524273": 12, "linear_97": 12, "635942": 12, "613655": 12, "590950": 12, "linear_98": 12, "460340": 12, "670146": 12, "398010": 12, "linear_99": 12, "532276": 12, "585537": 12, "119396": 12, "linear_101": 12, "585871": 12, "719224": 12, "205809": 12, "linear_100": 12, "751382": 12, "081648": 12, "linear_102": 12, "593344": 12, "450581": 12, "551147": 12, "linear_103": 12, "592681": 12, "705824": 12, "257959": 12, "linear_104": 12, "752957": 12, "980955": 12, "110489": 12, "linear_105": 12, "696240": 12, "877193": 12, "608953": 12, "linear_106": 12, "059659": 12, "643138": 12, "048950": 12, "linear_108": 12, "975461": 12, "589567": 12, "671457": 12, "linear_107": 12, "190381": 12, "515701": 12, "linear_109": 12, "710759": 12, "305635": 12, "082436": 12, "linear_110": 12, "531228": 12, "731162": 12, "159557": 12, "linear_111": 12, "528083": 12, "259322": 12, "211544": 12, "linear_112": 12, "148807": 12, "500842": 12, "087374": 12, "linear_113": 12, "592566": 12, "948851": 12, "65": 12, "166611": 12, "linear_115": 12, "437109": 12, "608947": 12, "642395": 12, "linear_114": 12, "193942": 12, "503904": 12, "linear_116": 12, "966980": 12, "200896": 12, "676392": 12, "linear_117": 12, "451303": 12, "061664": 12, "951344": 12, "linear_118": 12, "077262": 12, "965800": 12, "023804": 12, "linear_119": 12, "671615": 12, "847613": 12, "198460": 12, "linear_120": 12, "625638": 12, "131427": 12, "556595": 12, "linear_122": 12, "274080": 12, "888716": 12, "978189": 12, "linear_121": 12, "420480": 12, "429659": 12, "linear_123": 12, "826197": 12, "599617": 12, "281532": 12, "linear_124": 12, "396383": 12, "325849": 12, "335875": 12, "linear_125": 12, "337198": 12, "941410": 12, "221970": 12, "linear_126": 12, "699965": 12, "842878": 12, "224073": 12, "linear_127": 12, "775370": 12, "884215": 12, "696438": 12, "linear_129": 12, "872276": 12, "837319": 12, "254213": 12, "linear_128": 12, "180057": 12, "687883": 12, "linear_130": 12, "150427": 12, "454298": 12, "765789": 12, "linear_131": 12, "112692": 12, "924847": 12, "025545": 12, "linear_132": 12, "852893": 12, "116593": 12, "749626": 12, "linear_133": 12, "517084": 12, "024665": 12, "275314": 12, "linear_134": 12, "683807": 12, "878618": 12, "743618": 12, "linear_136": 12, "421055": 12, "322729": 12, "086264": 12, "linear_135": 12, "309880": 12, "917679": 12, "linear_137": 12, "827781": 12, "744595": 12, "915554": 12, "linear_138": 12, "422395": 12, "742882": 12, "402161": 12, "linear_139": 12, "527538": 12, "866123": 12, "849449": 12, "linear_140": 12, "128619": 12, "657793": 12, "266134": 12, "linear_141": 12, "839593": 12, "845993": 12, "021378": 12, "linear_143": 12, "442304": 12, "099039": 12, "889746": 12, "linear_142": 12, "325038": 12, "849592": 12, "linear_144": 12, "929444": 12, "618206": 12, "605080": 12, "linear_145": 12, "382126": 12, "321095": 12, "625010": 12, "linear_146": 12, "894987": 12, "867645": 12, "836517": 12, "linear_147": 12, "915313": 12, "906028": 12, "886522": 12, "linear_148": 12, "614287": 12, "908151": 12, "496181": 12, "linear_150": 12, "724932": 12, "485588": 12, "28": [12, 13, 21, 24, 28], "312899": 12, "linear_149": 12, "161146": 12, "606939": 12, "linear_151": 12, "164453": 12, "847355": 12, "719223": 12, "linear_152": 12, "086471": 12, "984121": 12, "222834": 12, "linear_153": 12, "099524": 12, "991601": 12, "816805": 12, "linear_154": 12, "054585": 12, "489706": 12, "286930": 12, "linear_155": 12, "389185": 12, "100321": 12, "963501": 12, "linear_157": 12, "982999": 12, "154796": 12, "637253": 12, "linear_156": 12, "537706": 12, "875190": 12, "linear_158": 12, "420287": 12, "502287": 12, "531588": 12, "linear_159": 12, "014746": 12, "423280": 12, "477261": 12, "linear_160": 12, "45": [12, 14, 19, 21, 24], "633553": 12, "715335": 12, "220921": 12, "linear_161": 12, "371849": 12, "117830": 12, "815203": 12, "linear_162": 12, "492933": 12, "126283": 12, "623318": 12, "linear_164": 12, "697504": 12, "825712": 12, "317358": 12, "linear_163": 12, "078367": 12, "008038": 12, "linear_165": 12, "023975": 12, "836278": 12, "577358": 12, "linear_166": 12, "860619": 12, "259792": 12, "493614": 12, "linear_167": 12, "380934": 12, "496160": 12, "107042": 12, "linear_168": 12, "691216": 12, "733317": 12, "831076": 12, "linear_169": 12, "723948": 12, "952728": 12, "129707": 12, "linear_171": 12, "034811": 12, "366547": 12, "665123": 12, "linear_170": 12, "356277": 12, "710501": 12, "linear_172": 12, "556884": 12, "729481": 12, "166058": 12, "linear_173": 12, "033039": 12, "207264": 12, "442120": 12, "linear_174": 12, "597379": 12, "658676": 12, "47": [12, 13, 14, 19, 24], "768131": 12, "linear_2": [12, 13], "293503": 12, "305265": 12, "877850": 12, "linear_1": [12, 13], "812222": 12, "766452": 12, "487047": 12, "linear_3": [12, 13], "999999": 12, "999755": 12, "031174": 12, "wish": [12, 13], "low": [12, 13], "accuraci": [12, 13, 20], "955k": 12, "18k": 12, "inparam": [12, 13], "inbin": [12, 13], "outparam": [12, 13], "outbin": [12, 13], "99m": 12, "78k": 12, "774k": [12, 13], "496": [12, 13, 24, 28], "774": [12, 13], "much": [12, 13], "linear": [12, 13, 21], "convolut": [12, 13, 29, 37, 40], "exact": [12, 13], "4x": [12, 13], "speed": [12, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "comparison": 12, "44": [12, 13, 24, 32, 33], "468000": [13, 17, 39], "lstm_transducer_stateless2": [13, 17, 39], "rnn": [13, 21, 27, 29, 39, 40, 41], "hidden": [13, 39], "862": 13, "222": [13, 22, 24], "865": 13, "is_pnnx": 13, "62e404dd3f3a811d73e424199b3408e309c06e1a": [13, 14], "mon": [13, 14], "6d7a559": [13, 14], "feb": [13, 14, 21], "54": [13, 14, 24, 28, 32, 33], "147": [13, 14], "rnn_hidden_s": 13, "aux_layer_period": 13, "235": 13, "43": [13, 14, 24], "472": 13, "595": 13, "324": 13, "83137520": 13, "596": 13, "257024": 13, "326": 13, "781812": 13, "327": 13, "84176356": 13, "182": [13, 14, 19, 28], "158": 13, "183": [13, 32, 33], "335": 13, "101": 13, "tracerwarn": [13, 14], "boolean": [13, 14], "might": [13, 14, 40, 41], "caus": [13, 14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "flow": [13, 14], "treat": [13, 14], "constant": [13, 14], "need_pad": 13, "bool": 13, "180": [13, 19, 24], "339": 13, "304": 13, "207": [13, 22, 24], "84": [13, 19], "324m": 13, "321": [13, 19], "318m": 13, "159m": 13, "21k": 13, "159": [13, 24, 35], "37": [13, 19, 21, 24, 32], "861": 13, "255": [13, 14], "425": [13, 24], "427": [13, 24], "266": [13, 14, 24, 28], "431": 13, "342": 13, "343": 13, "267": [13, 21, 32, 33], "379": 13, "268": [13, 24, 28], "317m": 13, "conv_15": 13, "930708": 13, "972025": 13, "conv_16": 13, "978855": 13, "031788": 13, "456645": 13, "conv_17": 13, "868437": 13, "830528": 13, "218575": 13, "linear_18": 13, "107259": 13, "194808": 13, "106": [13, 24], "293236": 13, "linear_19": 13, "193777": 13, "634748": 13, "401705": 13, "linear_20": 13, "259933": 13, "606617": 13, "722160": 13, "linear_21": 13, "186600": 13, "790260": 13, "512129": 13, "linear_22": 13, "759041": 13, "265832": 13, "050053": 13, "linear_23": 13, "931209": 13, "099090": 13, "979767": 13, "linear_24": 13, "324160": 13, "215561": 13, "321835": 13, "linear_25": 13, "800708": 13, "599352": 13, "284134": 13, "linear_26": 13, "492444": 13, "153369": 13, "274391": 13, "linear_27": 13, "660161": 13, "720994": 13, "46": [13, 19, 24], "674126": 13, "linear_28": 13, "415265": 13, "174434": 13, "007133": 13, "linear_29": 13, "038418": 13, "118534": 13, "724262": 13, "linear_30": 13, "072084": 13, "936867": 13, "259155": 13, "linear_31": 13, "342712": 13, "599489": 13, "282787": 13, "linear_32": 13, "340535": 13, "120308": 13, "701103": 13, "linear_33": 13, "846987": 13, "630030": 13, "985939": 13, "linear_34": 13, "686298": 13, "204571": 13, "607586": 13, "linear_35": 13, "904821": 13, "575518": 13, "756420": 13, "linear_36": 13, "806659": 13, "585589": 13, "118401": 13, "linear_37": 13, "402340": 13, "047157": 13, "162680": 13, "linear_38": 13, "174589": 13, "923361": 13, "030258": 13, "linear_39": 13, "178576": 13, "556058": 13, "807705": 13, "linear_40": 13, "901954": 13, "301267": 13, "956539": 13, "linear_41": 13, "839805": 13, "597429": 13, "716181": 13, "linear_42": 13, "178945": 13, "651595": 13, "895699": 13, "829245": 13, "627592": 13, "637907": 13, "746186": 13, "255032": 13, "167313": 13, "000000": 13, "999756": 13, "031013": 13, "345k": 13, "17k": 13, "218m": 13, "218": 13, "larger": [13, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "counterpart": 13, "bit": [13, 19, 21, 22, 24, 28, 35], "4532": 13, "stateless7": [14, 15], "pruned_transducer_stateless7_stream": [14, 15, 41], "len": [14, 15, 41], "feedforward": [14, 21, 27, 40], "384": [14, 24], "unmask": 14, "256": [14, 32, 33], "downsampl": [14, 20], "factor": [14, 19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "473": [14, 24], "477": 14, "warm_step": 14, "2000": [14, 22], "feedforward_dim": 14, "attention_dim": [14, 19, 21, 24], "encoder_unmasked_dim": 14, "zipformer_downsampling_factor": 14, "decode_chunk_len": 14, "257": [14, 21, 32, 33], "023": 14, "zipformer2": 14, "419": 14, "At": [14, 19, 24], "stack": 14, "downsampling_factor": 14, "037": 14, "655": 14, "346": 14, "68944004": 14, "347": 14, "260096": 14, "348": [14, 32], "716276": 14, "656": [14, 24], "349": 14, "69920376": 14, "351": 14, "353": 14, "174": [14, 24], "175": 14, "1344": 14, "assert": 14, "cached_len": 14, "num_lay": 14, "1348": 14, "cached_avg": 14, "1352": 14, "cached_kei": 14, "1356": 14, "cached_v": 14, "1360": 14, "cached_val2": 14, "1364": 14, "cached_conv1": 14, "1368": 14, "cached_conv2": 14, "1373": 14, "left_context_len": 14, "1884": 14, "x_size": 14, "2442": 14, "2449": 14, "2469": 14, "2473": 14, "2483": 14, "kv_len": 14, "2570": 14, "attn_output": 14, "bsz": 14, "num_head": 14, "seq_len": 14, "head_dim": 14, "2926": 14, "lorder": 14, "2652": 14, "2653": 14, "embed_dim": 14, "2666": 14, "1543": 14, "in_x_siz": 14, "1637": 14, "1643": 14, "in_channel": 14, "1571": 14, "1763": 14, "src1": 14, "src2": 14, "1779": 14, "dim1": 14, "1780": 14, "dim2": 14, "_trace": 14, "958": 14, "tracer": 14, "instead": [14, 21, 40], "tupl": 14, "namedtupl": 14, "absolut": 14, "know": [14, 25], "side": 14, "effect": 14, "strict": [14, 20], "allow": [14, 27, 40], "_c": 14, "_create_method_from_trac": 14, "640": 14, "646": 14, "embedding_out": 14, "686": 14, "204": [14, 24], "361": [14, 24, 28], "735": 14, "69": 14, "269m": 14, "53": [14, 19, 27, 28, 33, 39, 40], "269": [14, 19, 32, 33], "725": [14, 28], "1022k": 14, "266m": 14, "8m": 14, "509k": 14, "133m": 14, "152k": 14, "4m": 14, "1022": 14, "133": 14, "509": 14, "260": [14, 24], "264": 14, "360": 14, "365": 14, "280": [14, 24], "372": [14, 19], "state": [14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "026": 14, "410": 14, "2028": 14, "2547": 14, "2029": 14, "23316": 14, "23317": 14, "23318": 14, "23319": 14, "23320": 14, "amount": [14, 20], "pad": [14, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "conv2dsubsampl": 14, "arrai": 14, "23300": 14, "element": 14, "onnx_pretrain": 15, "onnxruntim": 15, "separ": 15, "deploi": [15, 19, 24], "repo_url": 15, "basenam": 15, "pushd": 15, "popd": 15, "tree": [16, 17, 19, 21, 22, 24, 28, 32, 33, 35, 39], "cpu_jit": [16, 19, 24, 27, 29, 30, 40, 41], "confus": 16, "move": [16, 27, 29, 30, 40, 41], "why": 16, "streaming_asr": [16, 17, 39, 40, 41], "conv_emform": 16, "offline_asr": [16, 27], "jit_pretrain": [17, 29, 30, 39], "baz": 17, "tutori": [19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 39, 40, 41], "learn": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "singl": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "1best": [19, 22, 24, 28, 29, 30, 32, 33], "automag": [19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "stop": [19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "control": [19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "By": [19, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "musan": [19, 22, 24, 25, 27, 29, 30, 39, 40, 41], "thei": [19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "intal": [19, 22], "initi": [19, 22], "sudo": [19, 22], "apt": [19, 22], "permiss": [19, 22], "commandlin": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "quit": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "often": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "experi": [19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "world": [19, 21, 22, 24, 25, 27, 28, 29, 30, 39, 40, 41], "multi": [19, 21, 22, 24, 25, 27, 29, 30, 37, 39, 40, 41], "machin": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "ddp": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "implement": [19, 21, 22, 24, 25, 27, 29, 30, 37, 39, 40, 41], "present": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "second": [19, 21, 22, 24, 25, 27, 29, 30, 35, 39, 40, 41], "over": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "utter": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "oom": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "v100": [19, 21, 22, 24], "nvidia": [19, 21, 22, 24], "due": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "usual": [19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "increas": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "weight": [19, 22, 24, 29, 30, 39], "decai": [19, 22, 24, 29, 30, 39], "warmup": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "get_param": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "realli": [19, 22, 24, 27, 29, 30, 39, 40, 41], "directli": [19, 21, 22, 24, 25, 27, 29, 30, 39, 40, 41], "perturb": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "3x150": [19, 21, 22], "hour": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "These": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "rate": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "visual": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "logdir": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "labelsmooth": 19, "someth": [19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "tensorflow": [19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "press": [19, 21, 22, 24, 27, 29, 30, 35, 39, 40, 41], "ctrl": [19, 21, 22, 24, 27, 29, 30, 35, 39, 40, 41], "engw8ksktzqs24zbv5dgcg": 19, "22t11": 19, "scan": [19, 21, 22, 24, 27, 35, 39, 40], "116068": 19, "scalar": [19, 21, 22, 24, 27, 35, 39, 40], "listen": [19, 21, 22, 27, 35, 39, 40], "url": [19, 21, 22, 24, 27, 29, 30, 35, 39, 40], "xxxx": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "saw": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "consol": [19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "typic": [19, 21, 22, 24], "avoid": [19, 21, 24], "commonli": [19, 21, 22, 24, 28, 32, 33, 35], "nbest": [19, 24, 30], "lattic": [19, 22, 24, 27, 28, 32, 33, 40, 41], "score": [19, 24, 27, 40, 41], "uniqu": [19, 24, 27, 40, 41], "pkufool": [19, 22, 28], "icefall_asr_aishell_conformer_ctc": 19, "transcrib": [19, 21, 22, 24], "v1": [19, 22, 24, 28, 32, 33], "lang_char": [19, 21], "word": [19, 21, 22, 24, 28, 32, 33, 35], "bac009s0764w0121": [19, 21, 22], "bac009s0764w0122": [19, 21, 22], "bac009s0764w0123": [19, 21, 22], "tran": [19, 22, 24, 28, 32, 33], "graph": [19, 22, 24, 27, 28, 32, 33, 40, 41], "id": [19, 22, 24, 28, 32, 33], "conveni": [19, 22, 24, 25], "eo": [19, 22, 24], "easili": [19, 22, 24], "obtain": [19, 21, 22, 24, 28, 32, 33], "soxi": [19, 21, 22, 24, 28, 35], "sampl": [19, 21, 22, 24, 28, 29, 35, 40, 41], "precis": [19, 21, 22, 24, 27, 28, 35, 40, 41], "67263": [19, 21, 22], "cdda": [19, 21, 22, 24, 28, 35], "sector": [19, 21, 22, 24, 28, 35], "135k": [19, 21, 22], "256k": [19, 21, 22, 24], "sign": [19, 21, 22, 24, 35], "integ": [19, 21, 22, 24, 35], "pcm": [19, 21, 22, 24, 35], "65840": [19, 21, 22], "625": [19, 21, 22], "132k": [19, 21, 22], "64000": [19, 21, 22], "300": [19, 21, 22, 24, 25, 27, 40], "128k": [19, 21, 22, 35], "displai": [19, 21, 22, 24], "topologi": [19, 24], "707": [19, 24], "num_decoder_lay": [19, 24], "vgg_frontend": [19, 21, 24], "use_feat_batchnorm": [19, 24], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 19, "sun": 19, "sep": 19, "33cfe45": 19, "d57a873": 19, "nov": [19, 24], "hw": 19, "kangwei": 19, "icefall_aishell3": 19, "k2_releas": 19, "tokens_fil": 19, "words_fil": [19, 24, 35], "num_path": [19, 24, 27, 40, 41], "ngram_lm_scal": [19, 24], "attention_decoder_scal": [19, 24], "nbest_scal": [19, 24], "sos_id": [19, 24], "eos_id": [19, 24], "num_class": [19, 24, 35], "4336": [19, 21], "242": [19, 24], "131": [19, 24], "134": 19, "275": 19, "241": 19, "293": [19, 24], "704": [19, 32], "369": [19, 24], "\u751a": [19, 21], "\u81f3": [19, 21], "\u51fa": [19, 21], "\u73b0": [19, 21], "\u4ea4": [19, 21], "\u6613": [19, 21], "\u51e0": [19, 21], "\u4e4e": [19, 21], "\u505c": [19, 21], "\u6b62": 19, "\u7684": [19, 21, 22], "\u60c5": [19, 21], "\u51b5": [19, 21], "\u4e00": [19, 21], "\u4e8c": [19, 21], "\u7ebf": [19, 21, 22], "\u57ce": [19, 21], "\u5e02": [19, 21], "\u867d": [19, 21], "\u7136": [19, 21], "\u4e5f": [19, 21, 22], "\u5904": [19, 21], "\u4e8e": [19, 21], "\u8c03": [19, 21], "\u6574": [19, 21], "\u4e2d": [19, 21, 22], "\u4f46": [19, 21, 22], "\u56e0": [19, 21], "\u4e3a": [19, 21], "\u805a": [19, 21], "\u96c6": [19, 21], "\u4e86": [19, 21, 22], "\u8fc7": [19, 21], "\u591a": [19, 21], "\u516c": [19, 21], "\u5171": [19, 21], "\u8d44": [19, 21], "\u6e90": [19, 21], "371": 19, "683": 19, "651": [19, 35], "654": 19, "659": 19, "752": 19, "887": 19, "340": 19, "370": 19, "\u751a\u81f3": [19, 22], "\u51fa\u73b0": [19, 22], "\u4ea4\u6613": [19, 22], "\u51e0\u4e4e": [19, 22], "\u505c\u6b62": 19, "\u60c5\u51b5": [19, 22], "\u4e00\u4e8c": [19, 22], "\u57ce\u5e02": [19, 22], "\u867d\u7136": [19, 22], "\u5904\u4e8e": [19, 22], "\u8c03\u6574": [19, 22], "\u56e0\u4e3a": [19, 22], "\u805a\u96c6": [19, 22], "\u8fc7\u591a": [19, 22], "\u516c\u5171": [19, 22], "\u8d44\u6e90": [19, 22], "recor": [19, 24], "highest": [19, 24], "965": 19, "966": 19, "821": 19, "822": 19, "826": 19, "916": 19, "345": 19, "888": 19, "889": 19, "limit": [19, 21, 24, 37, 40], "upgrad": [19, 24], "pro": [19, 24], "finish": [19, 21, 22, 24, 25, 27, 28, 32, 33, 35, 40, 41], "checkout": [19, 24], "v2": [19, 24], "hlg_decod": [19, 24], "four": [19, 24], "messag": [19, 24, 27, 29, 30, 39, 40, 41], "nn_model": [19, 24], "use_gpu": [19, 24], "word_tabl": [19, 24], "caution": [19, 24], "forward": [19, 24, 29], "cu": [19, 24], "int": [19, 24], "char": [19, 24], "124": [19, 24], "98": 19, "150": [19, 24], "693": [19, 32], "165": [19, 24], "nnet_output": [19, 24], "489": 19, "mandarin": 20, "corpu": 20, "beij": 20, "shell": 20, "technologi": 20, "ltd": 20, "400": 20, "peopl": 20, "accent": 20, "area": 20, "china": 20, "invit": 20, "particip": 20, "conduct": 20, "quiet": 20, "indoor": 20, "high": 20, "fidel": 20, "microphon": 20, "16khz": 20, "manual": 20, "through": 20, "profession": 20, "annot": 20, "inspect": 20, "free": [20, 25, 39], "academ": 20, "moder": 20, "research": 20, "field": 20, "openslr": 20, "ctc": [20, 23, 26, 30, 31, 34], "stateless": [20, 23, 27, 39, 40, 41], "head": [21, 37], "embed": [21, 27, 39, 40, 41], "conv1d": [21, 27, 39, 40, 41], "nn": [21, 27, 29, 30, 39, 40, 41], "tanh": 21, "borrow": 21, "ieeexplor": 21, "ieee": 21, "stamp": 21, "jsp": 21, "arnumb": 21, "9054419": 21, "predict": [21, 25, 27, 39, 40, 41], "charact": 21, "unit": 21, "vocabulari": 21, "87939824": 21, "optimized_transduc": 21, "technqiu": 21, "propos": [21, 37, 41], "improv": 21, "end": [21, 27, 29, 30, 35, 39, 40, 41], "furthermor": 21, "maximum": 21, "emit": 21, "per": [21, 27, 40, 41], "frame": [21, 27, 29, 40, 41], "simplifi": [21, 37], "significantli": 21, "degrad": 21, "exactli": 21, "benchmark": 21, "unprun": 21, "advantag": 21, "minim": 21, "pruned_transducer_stateless": [21, 27, 37, 40], "altern": 21, "though": 21, "transducer_stateless_modifi": 21, "pr": 21, "gb": 21, "ram": 21, "small": [21, 32, 33, 35], "tri": 21, "prob": [21, 39], "appli": [21, 37], "c": [21, 22, 27, 29, 30, 35, 39, 40, 41], "lagz6hrcqxoigbfd5e0y3q": 21, "03t14": 21, "8477": 21, "sym": [21, 27, 40, 41], "beam_search": [21, 27, 40, 41], "decoding_method": 21, "beam_4": 21, "ensur": 21, "give": 21, "poor": 21, "531": [21, 22], "994": [21, 24], "027": 21, "encoder_out_dim": 21, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 21, "50d2281": 21, "mar": 21, "0815224919": 21, "75d558775b": 21, "mmnv8": 21, "72": [21, 24], "878": [21, 33], "880": 21, "891": 21, "__floordiv__": 21, "x_len": 21, "163": [21, 24], "\u6ede": 21, "322": 21, "759": 21, "760": 21, "919": 21, "922": 21, "046": 21, "047": 21, "319": [21, 24], "214": [21, 24], "215": [21, 24, 28], "402": 21, "topk_hyp_index": 21, "topk_index": 21, "logit": 21, "583": [21, 33], "lji9mwuorlow3jkdhxwk8a": 22, "13t11": 22, "4454": 22, "icefall_asr_aishell_tdnn_lstm_ctc": 22, "858": [22, 24], "389": [22, 24], "154": 22, "161": [22, 24], "536": 22, "171": [22, 24, 32, 33], "539": 22, "917": 22, "129": 22, "\u505c\u6ede": 22, "statelessx": [23, 25, 26, 36, 37, 38], "mmi": [23, 26], "blank": [23, 26], "skip": [23, 25, 26, 27, 39, 40, 41], "distil": [23, 26], "hubert": [23, 26], "ligru": [23, 31], "full": [24, 25, 27, 29, 30, 39, 40, 41], "libri": [24, 25, 27, 29, 30, 39, 40, 41], "960": [24, 27, 29, 30, 39, 40, 41], "subset": [24, 27, 29, 30, 39, 40, 41], "3x960": [24, 27, 29, 30, 39, 40, 41], "2880": [24, 27, 29, 30, 39, 40, 41], "lzgnetjwrxc3yghnmd4kpw": 24, "24t16": 24, "4540": 24, "sentenc": 24, "piec": 24, "And": [24, 27, 29, 30, 39, 40, 41], "neither": 24, "nor": 24, "vocab": 24, "5000": 24, "033": 24, "538": 24, "full_libri": [24, 25], "406": 24, "464": 24, "548": 24, "776": 24, "652": [24, 35], "109226120": 24, "714": [24, 32], "944": 24, "1328": 24, "443": [24, 28], "2563": 24, "494": 24, "592": 24, "1715": 24, "52576": 24, "1424": 24, "807": 24, "506": 24, "808": [24, 32], "522": 24, "362": 24, "565": 24, "1477": 24, "2922": 24, "208": 24, "4295": 24, "52343": 24, "396": 24, "3584": 24, "433": 24, "680": [24, 32], "_pickl": 24, "unpicklingerror": 24, "hlg_modifi": 24, "g_4_gram": [24, 28, 32, 33], "875": [24, 28], "212k": 24, "267440": [24, 28], "1253": [24, 28], "535k": 24, "83": [24, 28], "77200": [24, 28], "154k": 24, "554": 24, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 24, "8d93169": 24, "601": 24, "758": 24, "025": 24, "broffel": 24, "osom": 24, "723": 24, "775": 24, "881": 24, "352": 24, "234": 24, "whole": [24, 28, 32, 33, 40, 41], "ngram": [24, 28, 32, 33], "857": 24, "979": 24, "980": 24, "055": 24, "117": 24, "051": 24, "363": 24, "959": [24, 33], "546": 24, "598": 24, "599": [24, 28], "833": 24, "834": 24, "915": 24, "076": 24, "110": 24, "397": 24, "999": [24, 27, 40, 41], "concaten": 24, "bucket": 24, "sampler": 24, "1000": 24, "ctc_decod": 24, "ngram_lm_rescor": 24, "attention_rescor": 24, "kind": [24, 27, 29, 30, 39, 40, 41], "316": 24, "118": 24, "221": 24, "125": [24, 35], "136": 24, "144": 24, "543": 24, "topo": 24, "547": 24, "729": 24, "111": 24, "702": 24, "703": 24, "545": 24, "122": 24, "135": [24, 35], "153": [24, 35], "945": 24, "475": 24, "191": [24, 32, 33], "398": 24, "199": [24, 28], "515": 24, "205": 24, "w": [24, 32, 33], "deseri": 24, "441": 24, "fsaclass": 24, "loadfsa": 24, "const": 24, "string": 24, "c10": 24, "ignor": 24, "dummi": 24, "589": 24, "attention_scal": 24, "162": 24, "169": [24, 32, 33], "188": 24, "624": 24, "519": [24, 33], "632": 24, "645": [24, 35], "243": 24, "970": 24, "303": 24, "179": 24, "knowledg": 25, "_": 25, "vector": 25, "mvq": 25, "kd": 25, "paper": [25, 27, 39, 40, 41], "pruned_transducer_stateless4": [25, 27, 37, 40], "theoret": 25, "applic": 25, "minor": 25, "out": 25, "necessari": 25, "thing": 25, "distillation_with_hubert": 25, "Of": 25, "cours": 25, "xl": 25, "proce": 25, "960h": [25, 29], "use_extracted_codebook": 25, "augment": 25, "th": [25, 32, 33], "fine": 25, "embedding_lay": 25, "num_codebook": 25, "under": 25, "vq_fbank_layer36_cb8": 25, "whola": 25, "snippet": 25, "echo": 25, "awk": 25, "split": 25, "pruned_transducer_stateless6": 25, "12359": 25, "spec": 25, "aug": 25, "warp": 25, "enabl": 25, "argument": [25, 37], "paid": 25, "similar": [25, 29, 40, 41], "suitabl": [27, 39, 40, 41], "pruned_transducer_stateless2": [27, 37, 40], "pruned_transducer_stateless5": [27, 37, 40], "scroll": [27, 29, 30, 39, 40, 41], "scratch": [27, 29, 30, 39, 40, 41], "arxiv": [27, 39, 40, 41], "ab": [27, 39, 40, 41], "2206": [27, 39, 40, 41], "13236": [27, 39, 40, 41], "rework": [27, 37, 40], "daniel": [27, 40, 41], "joint": [27, 39, 40, 41], "contrari": [27, 39, 40, 41], "convent": [27, 39, 40, 41], "recurr": [27, 39, 40, 41], "2x": [27, 40, 41], "dimens": [27, 40, 41], "littl": [27, 40], "436000": [27, 29, 30, 39, 40, 41], "438000": [27, 29, 30, 39, 40, 41], "qogspbgsr8kzcrmmie9jgw": 27, "20t15": [27, 39, 40], "4468": [27, 39, 40], "210171": [27, 39, 40], "access": [27, 29, 30, 39, 40, 41], "6008": [27, 29, 30, 39, 40, 41], "localhost": [27, 29, 30, 39, 40, 41], "expos": [27, 29, 30, 39, 40, 41], "proxi": [27, 29, 30, 39, 40, 41], "bind_al": [27, 29, 30, 39, 40, 41], "both": [27, 29, 30, 37, 39, 40, 41], "lowest": [27, 29, 30, 39, 40, 41], "fast_beam_search": [27, 29, 39, 40, 41], "474000": [27, 39, 40, 41], "largest": [27, 40, 41], "posterior": [27, 29, 40, 41], "algorithm": [27, 40, 41], "pdf": [27, 30, 40, 41], "1211": [27, 40, 41], "3711": [27, 40, 41], "espnet": [27, 40, 41], "net": [27, 40, 41], "beam_search_transduc": [27, 40, 41], "basicli": [27, 40, 41], "topk": [27, 40, 41], "expand": [27, 40, 41], "mode": [27, 40, 41], "being": [27, 40, 41], "hardcod": [27, 40, 41], "composit": [27, 40, 41], "between": [27, 40, 41], "log_prob": [27, 40, 41], "hard": [27, 37, 40, 41], "2211": [27, 40, 41], "00484": [27, 40, 41], "rnnt": [27, 40, 41], "effici": [27, 40, 41], "fast_beam_search_lg": [27, 40, 41], "trivial": [27, 40, 41], "fast_beam_search_nbest": [27, 40, 41], "random_path": [27, 40, 41], "shortest": [27, 40, 41], "fast_beam_search_nbest_lg": [27, 40, 41], "logic": [27, 40, 41], "smallest": [27, 39, 40, 41], "icefall_asr_librispeech_tdnn": 28, "lstm_ctc": 28, "flac": 28, "116k": 28, "140k": 28, "343k": 28, "164k": 28, "105k": 28, "174k": 28, "pretraind": 28, "168": 28, "170": 28, "584": [28, 33], "209": 28, "791": 28, "245": 28, "099": 28, "methond": [28, 32, 33], "403": 28, "631": 28, "010": 28, "guidanc": 29, "bigger": 29, "simpli": 29, "discard": 29, "prevent": 29, "lconv": 29, "encourag": [29, 30, 39], "stabil": [29, 30], "doesn": 29, "warm": [29, 30], "xyozukpeqm62hbilud4upa": [29, 30], "ctc_guide_decode_b": 29, "pretrained_ctc": 29, "jit_pretrained_ctc": 29, "100h": 29, "yfyeung": 29, "wechat": 30, "zipformer_mmi": 30, "worker": [30, 39], "hp": 30, "tdnn_ligru_ctc": 32, "enough": [32, 33, 35], "luomingshuang": [32, 33], "icefall_asr_timit_tdnn_ligru_ctc": 32, "pretrained_average_9_25": 32, "fdhc0_si1559": [32, 33], "felc0_si756": [32, 33], "fmgd0_si1564": [32, 33], "ffprobe": [32, 33], "show_format": [32, 33], "nistspher": [32, 33], "database_id": [32, 33], "database_vers": [32, 33], "utterance_id": [32, 33], "dhc0_si1559": [32, 33], "sample_min": [32, 33], "4176": [32, 33], "sample_max": [32, 33], "5984": [32, 33], "bitrat": [32, 33], "258": [32, 33], "audio": [32, 33], "pcm_s16le": [32, 33], "s16": [32, 33], "elc0_si756": [32, 33], "1546": [32, 33], "1989": [32, 33], "mgd0_si1564": [32, 33], "7626": [32, 33], "10573": [32, 33], "660": 32, "695": 32, "697": 32, "210": [32, 33], "829": 32, "sil": [32, 33], "dh": [32, 33], "ih": [32, 33], "uw": [32, 33], "ah": [32, 33], "ii": [32, 33], "z": [32, 33], "aa": [32, 33], "ei": [32, 33], "dx": [32, 33], "uh": [32, 33], "ng": [32, 33], "eh": [32, 33], "jh": [32, 33], "er": [32, 33], "ai": [32, 33], "hh": [32, 33], "aw": 32, "ae": [32, 33], "705": 32, "715": 32, "720": 32, "251": [32, 33], "ch": 32, "icefall_asr_timit_tdnn_lstm_ctc": 33, "pretrained_average_16_25": 33, "816": 33, "827": 33, "387": 33, "unk": 33, "739": 33, "971": 33, "977": 33, "978": 33, "981": 33, "ow": 33, "ykubhb5wrmosxykid1z9eg": 35, "23t23": 35, "icefall_asr_yesno_tdnn": 35, "l_disambig": 35, "lexicon_disambig": 35, "arpa": 35, "0_0_0_1_0_0_0_1": 35, "0_0_1_0_0_0_1_0": 35, "0_0_1_0_0_1_1_1": 35, "0_0_1_0_1_0_0_1": 35, "0_0_1_1_0_0_0_1": 35, "0_0_1_1_0_1_1_0": 35, "0_0_1_1_1_0_0_0": 35, "0_0_1_1_1_1_0_0": 35, "0_1_0_0_0_1_0_0": 35, "0_1_0_0_1_0_1_0": 35, "0_1_0_1_0_0_0_0": 35, "0_1_0_1_1_1_0_0": 35, "0_1_1_0_0_1_1_1": 35, "0_1_1_1_0_0_1_0": 35, "0_1_1_1_1_0_1_0": 35, "1_0_0_0_0_0_0_0": 35, "1_0_0_0_0_0_1_1": 35, "1_0_0_1_0_1_1_1": 35, "1_0_1_1_0_1_1_1": 35, "1_0_1_1_1_1_0_1": 35, "1_1_0_0_0_1_1_1": 35, "1_1_0_0_1_0_1_1": 35, "1_1_0_1_0_1_0_0": 35, "1_1_0_1_1_0_0_1": 35, "1_1_0_1_1_1_1_0": 35, "1_1_1_0_0_1_0_1": 35, "1_1_1_0_1_0_1_0": 35, "1_1_1_1_0_0_1_0": 35, "1_1_1_1_1_0_0_0": 35, "1_1_1_1_1_1_1_1": 35, "54080": 35, "507": 35, "108k": 35, "ye": 35, "hebrew": 35, "NO": 35, "621": 35, "119": 35, "650": 35, "139": 35, "143": 35, "198": 35, "181": 35, "186": 35, "187": 35, "correctli": 35, "simplest": 35, "former": 37, "idea": 37, "achiev": 37, "mask": [37, 40, 41], "wenet": 37, "did": 37, "metion": 37, "complic": 37, "techniqu": 37, "bank": 37, "memor": 37, "histori": 37, "introduc": 37, "variant": 37, "pruned_stateless_emformer_rnnt2": 37, "conv_emformer_transducer_stateless": 37, "ourself": 37, "mechan": 37, "onlin": 39, "lstm_transducer_stateless": 39, "lower": 39, "prepare_giga_speech": 39, "cj2vtpiwqhkn9q1tx6ptpg": 39, "dynam": [40, 41], "causal": 40, "short": [40, 41], "2012": 40, "05481": 40, "flag": 40, "indic": [40, 41], "whether": 40, "sequenc": [40, 41], "uniformli": [40, 41], "seen": [40, 41], "97vkxf80ru61cnp2alwzzg": 40, "streaming_decod": [40, 41], "acoust": [40, 41], "wise": [40, 41], "parallel": [40, 41], "bath": [40, 41], "parallelli": [40, 41], "seem": 40, "benefit": 40, "mismatch": 40, "mdoel": 40, "320m": 41, "550": 41, "scriptmodul": 41, "jit_trace_export": 41, "jit_trace_pretrain": 41, "task": 42}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 10, 16, 17], "creat": [2, 9], "recip": [2, 42], "data": [2, 9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "prepar": [2, 9, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "train": [2, 6, 9, 12, 13, 14, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "decod": [2, 9, 10, 15, 19, 21, 22, 24, 25, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "pre": [2, 6, 12, 13, 14, 15, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "model": [2, 6, 10, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "frequent": 4, "ask": 4, "question": 4, "faq": 4, "oserror": 4, "libtorch_hip": 4, "so": 4, "cannot": 4, "open": 4, "share": 4, "object": 4, "file": [4, 15], "directori": 4, "attributeerror": 4, "modul": 4, "distutil": 4, "ha": 4, "attribut": 4, "version": 4, "importerror": 4, "libpython3": 4, "10": 4, "1": [4, 9, 12, 13, 14, 19, 21, 22, 24], "0": [4, 9], "No": 4, "huggingfac": [5, 7], "space": 7, "youtub": [7, 9], "video": [7, 9], "icefal": [8, 9, 12, 13, 14], "content": [8, 42], "instal": [9, 12, 13, 14, 19, 21, 22, 24, 28, 32, 33], "cuda": 9, "toolkit": 9, "cudnn": 9, "pytorch": 9, "torchaudio": 9, "2": [9, 12, 13, 14, 19, 21, 22, 24], "k2": 9, "3": [9, 12, 13, 14, 19, 21, 24], "lhots": 9, "4": [9, 12, 13, 14], "download": [9, 12, 13, 14, 15, 19, 21, 22, 24, 27, 28, 29, 30, 32, 33, 35, 39, 40, 41], "exampl": [9, 15, 19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "virtual": 9, "environ": 9, "activ": 9, "your": 9, "5": [9, 12, 13, 14], "test": [9, 12, 13, 14], "export": [10, 11, 12, 13, 14, 15, 16, 17, 18, 27, 29, 30, 39, 40, 41], "state_dict": [10, 27, 29, 30, 39, 40, 41], "when": [10, 16, 17], "us": [10, 16, 17, 27, 29, 30, 39, 40, 41], "run": 10, "py": 10, "ncnn": [11, 12, 13, 14], "convemform": 12, "transduc": [12, 13, 14, 21, 27, 39, 40, 41], "pnnx": [12, 13, 14], "via": [12, 13, 14], "torch": [12, 13, 14, 16, 17, 27, 29, 30, 39, 40, 41], "jit": [12, 13, 14, 16, 17, 27, 29, 30, 39, 40, 41], "trace": [12, 13, 14, 17, 39, 41], "torchscript": [12, 13, 14], "6": [12, 13, 14], "modifi": [12, 13, 14, 21], "encod": [12, 13, 14], "sherpa": [12, 13, 14, 15, 27, 40, 41], "7": [12, 13], "option": [12, 13, 19, 22, 24, 27, 29, 30, 39, 40, 41], "int8": [12, 13], "quantiz": [12, 13], "lstm": [13, 22, 28, 33, 39], "stream": [14, 23, 36, 37, 40, 41], "zipform": [14, 29, 30, 41], "onnx": 15, "sound": 15, "script": [16, 27, 29, 30, 40, 41], "conform": [19, 24, 37], "ctc": [19, 22, 24, 28, 29, 32, 33, 35], "configur": [19, 22, 24, 27, 29, 30, 39, 40, 41], "log": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "usag": [19, 21, 22, 24, 27, 29, 30, 39, 40, 41], "case": [19, 21, 22, 24], "kaldifeat": [19, 21, 22, 24, 28, 32, 33, 35], "hlg": [19, 22, 24], "attent": [19, 24], "rescor": [19, 24], "colab": [19, 21, 22, 24, 28, 32, 33, 35], "notebook": [19, 21, 22, 24, 28, 32, 33, 35], "deploy": [19, 24], "c": [19, 24], "aishel": 20, "stateless": 21, "The": 21, "loss": 21, "todo": 21, "greedi": 21, "search": 21, "beam": 21, "tdnn": [22, 28, 32, 33, 35], "non": 23, "asr": [23, 36], "lm": 24, "comput": 24, "wer": 24, "n": 24, "gram": 24, "distil": 25, "hubert": 25, "codebook": 25, "index": 25, "librispeech": [26, 38], "prune": [27, 40], "statelessx": [27, 40], "pretrain": [27, 29, 30, 39, 40, 41], "deploi": [27, 40, 41], "infer": [28, 32, 33, 35], "blank": 29, "skip": 29, "mmi": 30, "timit": 31, "ligru": 32, "yesno": 34, "introduct": 37, "emform": 37, "which": 39, "simul": [40, 41], "real": [40, 41], "tabl": 42}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [21, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [19, "training"], [21, "training"], [22, "training"], [24, "training"], [25, "training"], [27, "training"], [28, "training"], [29, "training"], [30, "training"], [32, "training"], [33, "training"], [35, "training"], [39, "training"], [40, "training"], [41, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [19, "decoding"], [21, "decoding"], [22, "decoding"], [24, "decoding"], [25, "decoding"], [27, "decoding"], [28, "decoding"], [29, "decoding"], [30, "decoding"], [32, "decoding"], [33, "decoding"], [35, "decoding"], [39, "decoding"], [40, "decoding"], [41, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "Frequently Asked Questions (FAQs)": [[4, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[4, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[4, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[4, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "Huggingface": [[5, "huggingface"]], "Pre-trained models": [[6, "pre-trained-models"]], "Huggingface spaces": [[7, "huggingface-spaces"]], "YouTube Video": [[7, "youtube-video"], [9, "youtube-video"]], "Icefall": [[8, "icefall"]], "Contents:": [[8, null]], "Installation": [[9, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[9, "install-cuda-toolkit-and-cudnn"]], "(1) Install PyTorch and torchaudio": [[9, "install-pytorch-and-torchaudio"]], "(2) Install k2": [[9, "install-k2"]], "(3) Install lhotse": [[9, "install-lhotse"]], "(4) Download icefall": [[9, "download-icefall"]], "Installation example": [[9, "installation-example"]], "(1) Create a virtual environment": [[9, "create-a-virtual-environment"]], "(2) Activate your virtual environment": [[9, "activate-your-virtual-environment"]], "(3) Install k2": [[9, "id1"]], "(4) Install lhotse": [[9, "id2"]], "(5) Download icefall": [[9, "id3"]], "Test Your Installation": [[9, "test-your-installation"]], "Data preparation": [[9, "data-preparation"], [19, "data-preparation"], [22, "data-preparation"], [24, "data-preparation"], [25, "data-preparation"], [27, "data-preparation"], [28, "data-preparation"], [29, "data-preparation"], [30, "data-preparation"], [32, "data-preparation"], [33, "data-preparation"], [35, "data-preparation"], [39, "data-preparation"], [40, "data-preparation"], [41, "data-preparation"]], "Export model.state_dict()": [[10, "export-model-state-dict"], [27, "export-model-state-dict"], [29, "export-model-state-dict"], [30, "export-model-state-dict"], [39, "export-model-state-dict"], [40, "export-model-state-dict"], [41, "export-model-state-dict"]], "When to use it": [[10, "when-to-use-it"], [16, "when-to-use-it"], [17, "when-to-use-it"]], "How to export": [[10, "how-to-export"], [16, "how-to-export"], [17, "how-to-export"]], "How to use the exported model": [[10, "how-to-use-the-exported-model"], [16, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[10, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[11, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[12, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[12, "download-the-pre-trained-model"], [13, "download-the-pre-trained-model"], [14, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[12, "install-ncnn-and-pnnx"], [13, "install-ncnn-and-pnnx"], [14, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[12, "export-the-model-via-torch-jit-trace"], [13, "export-the-model-via-torch-jit-trace"], [14, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[12, "export-torchscript-model-via-pnnx"], [13, "export-torchscript-model-via-pnnx"], [14, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[12, "test-the-exported-models-in-icefall"], [13, "test-the-exported-models-in-icefall"], [14, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[12, "modify-the-exported-encoder-for-sherpa-ncnn"], [13, "modify-the-exported-encoder-for-sherpa-ncnn"], [14, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[12, "optional-int8-quantization-with-sherpa-ncnn"], [13, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[13, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[14, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[15, "export-to-onnx"]], "sherpa-onnx": [[15, "sherpa-onnx"]], "Example": [[15, "example"]], "Download the pre-trained model": [[15, "download-the-pre-trained-model"], [19, "download-the-pre-trained-model"], [21, "download-the-pre-trained-model"], [22, "download-the-pre-trained-model"], [24, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [32, "download-the-pre-trained-model"], [33, "download-the-pre-trained-model"], [35, "download-the-pre-trained-model"]], "Export the model to ONNX": [[15, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[15, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[16, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[17, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[17, "how-to-use-the-exported-models"]], "Model export": [[18, "model-export"]], "Conformer CTC": [[19, "conformer-ctc"], [24, "conformer-ctc"]], "Configurable options": [[19, "configurable-options"], [22, "configurable-options"], [24, "configurable-options"], [27, "configurable-options"], [29, "configurable-options"], [30, "configurable-options"], [39, "configurable-options"], [40, "configurable-options"], [41, "configurable-options"]], "Pre-configured options": [[19, "pre-configured-options"], [22, "pre-configured-options"], [24, "pre-configured-options"], [27, "pre-configured-options"], [29, "pre-configured-options"], [30, "pre-configured-options"], [39, "pre-configured-options"], [40, "pre-configured-options"], [41, "pre-configured-options"]], "Training logs": [[19, "training-logs"], [21, "training-logs"], [22, "training-logs"], [24, "training-logs"], [27, "training-logs"], [29, "training-logs"], [30, "training-logs"], [39, "training-logs"], [40, "training-logs"], [41, "training-logs"]], "Usage examples": [[19, "usage-examples"], [21, "usage-examples"], [22, "usage-examples"], [24, "usage-examples"]], "Case 1": [[19, "case-1"], [21, "case-1"], [22, "case-1"], [24, "case-1"]], "Case 2": [[19, "case-2"], [21, "case-2"], [22, "case-2"], [24, "case-2"]], "Case 3": [[19, "case-3"], [21, "case-3"], [24, "case-3"]], "Pre-trained Model": [[19, "pre-trained-model"], [21, "pre-trained-model"], [22, "pre-trained-model"], [24, "pre-trained-model"], [28, "pre-trained-model"], [32, "pre-trained-model"], [33, "pre-trained-model"], [35, "pre-trained-model"]], "Install kaldifeat": [[19, "install-kaldifeat"], [21, "install-kaldifeat"], [22, "install-kaldifeat"], [24, "install-kaldifeat"], [28, "install-kaldifeat"], [32, "install-kaldifeat"], [33, "install-kaldifeat"]], "Usage": [[19, "usage"], [21, "usage"], [22, "usage"], [24, "usage"]], "CTC decoding": [[19, "ctc-decoding"], [24, "ctc-decoding"], [24, "id2"]], "HLG decoding": [[19, "hlg-decoding"], [19, "id2"], [22, "hlg-decoding"], [24, "hlg-decoding"], [24, "id3"]], "HLG decoding + attention decoder rescoring": [[19, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[19, "colab-notebook"], [21, "colab-notebook"], [22, "colab-notebook"], [24, "colab-notebook"], [28, "colab-notebook"], [32, "colab-notebook"], [33, "colab-notebook"], [35, "colab-notebook"]], "Deployment with C++": [[19, "deployment-with-c"], [24, "deployment-with-c"]], "aishell": [[20, "aishell"]], "Stateless Transducer": [[21, "stateless-transducer"]], "The Model": [[21, "the-model"]], "The Loss": [[21, "the-loss"]], "Todo": [[21, "id1"]], "Greedy search": [[21, "greedy-search"]], "Beam search": [[21, "beam-search"]], "Modified Beam search": [[21, "modified-beam-search"]], "TDNN-LSTM CTC": [[22, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[23, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[24, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[24, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[24, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[24, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[24, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[25, "distillation-with-hubert"]], "Codebook index preparation": [[25, "codebook-index-preparation"]], "LibriSpeech": [[26, "librispeech"], [38, "librispeech"]], "Pruned transducer statelessX": [[27, "pruned-transducer-statelessx"], [40, "pruned-transducer-statelessx"]], "Usage example": [[27, "usage-example"], [29, "usage-example"], [30, "usage-example"], [39, "usage-example"], [40, "usage-example"], [41, "usage-example"]], "Export Model": [[27, "export-model"], [40, "export-model"], [41, "export-model"]], "Export model using torch.jit.script()": [[27, "export-model-using-torch-jit-script"], [29, "export-model-using-torch-jit-script"], [30, "export-model-using-torch-jit-script"], [40, "export-model-using-torch-jit-script"], [41, "export-model-using-torch-jit-script"]], "Download pretrained models": [[27, "download-pretrained-models"], [29, "download-pretrained-models"], [30, "download-pretrained-models"], [39, "download-pretrained-models"], [40, "download-pretrained-models"], [41, "download-pretrained-models"]], "Deploy with Sherpa": [[27, "deploy-with-sherpa"], [40, "deploy-with-sherpa"], [41, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[28, "tdnn-lstm-ctc"], [33, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[28, "inference-with-a-pre-trained-model"], [32, "inference-with-a-pre-trained-model"], [33, "inference-with-a-pre-trained-model"], [35, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[29, "zipformer-ctc-blank-skip"]], "Export models": [[29, "export-models"], [30, "export-models"], [39, "export-models"]], "Zipformer MMI": [[30, "zipformer-mmi"]], "TIMIT": [[31, "timit"]], "TDNN-LiGRU-CTC": [[32, "tdnn-ligru-ctc"]], "YesNo": [[34, "yesno"]], "TDNN-CTC": [[35, "tdnn-ctc"]], "Download kaldifeat": [[35, "download-kaldifeat"]], "Streaming ASR": [[36, "streaming-asr"]], "Introduction": [[37, "introduction"]], "Streaming Conformer": [[37, "streaming-conformer"]], "Streaming Emformer": [[37, "streaming-emformer"]], "LSTM Transducer": [[39, "lstm-transducer"]], "Which model to use": [[39, "which-model-to-use"]], "Export model using torch.jit.trace()": [[39, "export-model-using-torch-jit-trace"], [41, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[40, "simulate-streaming-decoding"], [41, "simulate-streaming-decoding"]], "Real streaming decoding": [[40, "real-streaming-decoding"], [41, "real-streaming-decoding"]], "Zipformer Transducer": [[41, "zipformer-transducer"]], "Recipes": [[42, "recipes"]], "Table of Contents": [[42, null]]}, "indexentries": {}})