Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "decoding-with-langugage-models/LODR", "decoding-with-langugage-models/index", "decoding-with-langugage-models/rescoring", "decoding-with-langugage-models/shallow-fusion", "docker/index", "docker/intro", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-ncnn-conv-emformer", "model-export/export-ncnn-lstm", "model-export/export-ncnn-zipformer", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "decoding-with-langugage-models/LODR.rst", "decoding-with-langugage-models/index.rst", "decoding-with-langugage-models/rescoring.rst", "decoding-with-langugage-models/shallow-fusion.rst", "docker/index.rst", "docker/intro.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-ncnn-conv-emformer.rst", "model-export/export-ncnn-lstm.rst", "model-export/export-ncnn-zipformer.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "LODR for RNN Transducer", "Decoding with language models", "LM rescoring for Transducer", "Shallow fusion for Transducer", "Docker", "Introduction", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export ConvEmformer transducer models to ncnn", "Export LSTM transducer models to ncnn", "Export streaming Zipformer transducer models to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 30, 31, 34, 38, 39, 41, 43], "tool": [0, 10, 15, 18], "make": [0, 1, 3, 18, 19, 20, 25, 27, 30, 43], "consist": [0, 27, 33, 45, 46, 47], "possibl": [0, 2, 3, 25, 30], "black": 0, "format": [0, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "flake8": 0, "check": [0, 15, 30], "qualiti": [0, 26], "isort": 0, "sort": [0, 15], "import": [0, 9, 10, 15, 18, 46, 47], "The": [0, 1, 2, 4, 7, 9, 10, 13, 15, 16, 18, 19, 20, 25, 26, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "version": [0, 9, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 38, 39, 46], "abov": [0, 4, 6, 7, 10, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "ar": [0, 1, 3, 4, 5, 6, 7, 9, 10, 15, 16, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "22": [0, 9, 15, 18, 19, 30, 38, 39, 41], "3": [0, 4, 6, 7, 9, 10, 14, 16, 17, 21, 24, 28, 31, 33, 34, 35, 36, 41, 45, 46, 47], "0": [0, 1, 4, 6, 7, 9, 14, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "5": [0, 7, 17, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "4": [0, 4, 5, 6, 7, 9, 10, 14, 16, 17, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "10": [0, 7, 9, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "1": [0, 4, 6, 7, 9, 14, 16, 17, 21, 22, 23, 24, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "after": [0, 1, 6, 9, 13, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "run": [0, 2, 8, 10, 13, 14, 15, 18, 19, 20, 21, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "command": [0, 1, 4, 6, 7, 9, 10, 15, 16, 18, 19, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "git": [0, 4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "clone": [0, 4, 6, 7, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "http": [0, 1, 2, 4, 6, 7, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "github": [0, 2, 6, 9, 12, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "com": [0, 2, 6, 9, 12, 13, 15, 16, 18, 19, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "k2": [0, 2, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "fsa": [0, 2, 9, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 30, 33, 35, 36, 45, 46, 47], "icefal": [0, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 16, 17, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "cd": [0, 1, 2, 9, 10, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "pip": [0, 1, 6, 10, 15, 18, 21, 27], "instal": [0, 1, 4, 6, 10, 11, 13, 14, 16, 17, 21, 24, 31, 33, 35, 36, 41, 45, 46, 47], "pre": [0, 3, 4, 6, 7, 8, 9, 11, 13, 14, 15, 17, 24, 31], "commit": [0, 15], "whenev": 0, "you": [0, 1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "automat": [0, 13, 31], "hook": 0, "invok": 0, "fail": 0, "If": [0, 2, 4, 6, 7, 8, 9, 10, 13, 18, 19, 20, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "ani": [0, 4, 6, 7, 15, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46], "your": [0, 1, 2, 4, 6, 7, 9, 11, 13, 14, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "wa": [0, 16, 30, 34], "success": [0, 15, 18, 19], "pleas": [0, 1, 2, 4, 6, 7, 9, 10, 13, 15, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "fix": [0, 9, 10, 18, 19, 20, 30], "issu": [0, 4, 6, 7, 10, 15, 18, 19, 30, 31, 46, 47], "report": [0, 9, 10, 31], "some": [0, 1, 4, 6, 16, 18, 19, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "i": [0, 1, 2, 4, 7, 9, 10, 13, 15, 16, 17, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "e": [0, 2, 4, 5, 6, 7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "modifi": [0, 17, 24, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "file": [0, 2, 9, 13, 14, 16, 18, 19, 20, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "place": [0, 15, 16, 27, 30, 34], "so": [0, 4, 6, 7, 9, 13, 14, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "statu": 0, "failur": 0, "see": [0, 1, 6, 7, 13, 15, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "which": [0, 2, 4, 6, 7, 9, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 46, 47], "ha": [0, 2, 14, 15, 17, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 43, 45, 46, 47], "been": [0, 15, 17, 18, 19, 20, 27], "befor": [0, 1, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "further": [0, 4, 6, 7], "chang": [0, 4, 6, 7, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "all": [0, 9, 12, 13, 16, 18, 19, 20, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "again": [0, 18, 19, 41], "should": [0, 2, 4, 6, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "succe": 0, "thi": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "time": [0, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "succeed": 0, "want": [0, 4, 6, 7, 15, 16, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "can": [0, 1, 2, 4, 6, 7, 8, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "do": [0, 2, 4, 6, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "Or": 0, "without": [0, 4, 6, 7, 9, 11, 13, 25, 30], "your_changed_fil": 0, "py": [0, 2, 4, 6, 7, 9, 10, 15, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 4, 6, 7, 8, 9, 12, 13, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "prepar": [1, 3, 4, 8, 16], "environ": [1, 10, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "doc": [1, 16, 43], "r": [1, 15, 18, 19, 20, 38, 39], "requir": [1, 4, 6, 15, 20, 31, 46, 47], "txt": [1, 4, 9, 15, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "set": [1, 4, 6, 7, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "up": [1, 15, 16, 18, 19, 20, 25, 28, 30, 31, 33, 34, 35, 36, 46, 47], "readi": [1, 25, 30, 31], "refer": [1, 2, 6, 7, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 28, 30, 33, 34, 35, 38, 39, 41, 43, 46, 47], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 15, 16, 18, 19, 20, 25, 27, 30], "local": [1, 9, 15, 33, 35, 36, 45, 46, 47], "preview": 1, "what": [1, 2, 15, 18, 19, 20, 27, 43], "look": [1, 2, 4, 6, 7, 12, 15, 18, 19, 20, 25, 27, 28, 30, 31], "like": [1, 2, 9, 13, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46], "publish": [1, 16, 26], "html": [1, 2, 10, 15, 17, 18, 19, 20, 21, 22, 23, 33, 45, 46, 47], "gener": [1, 6, 9, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "view": [1, 8, 14, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "follow": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "python3": [1, 9, 10, 15, 19, 20], "m": [1, 15, 18, 19, 20, 27, 33, 35, 36, 38, 39, 45, 46, 47], "server": [1, 13, 45], "It": [1, 2, 6, 7, 9, 11, 15, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "print": [1, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "serv": [1, 33, 35, 36, 45, 46, 47], "port": [1, 31, 33, 35, 36, 45, 46, 47], "8000": [1, 41], "open": [1, 4, 6, 7, 9, 14, 16, 18, 19, 20, 26, 27, 30, 31], "browser": [1, 11, 13, 33, 35, 36, 45, 46, 47], "go": [1, 7, 25, 27, 30, 33, 35, 36, 45, 46, 47], "read": [2, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "code": [2, 3, 8, 10, 14, 15, 18, 19, 20, 25, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47], "style": [2, 3, 14], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 30, 33, 35, 36, 45, 46, 47], "recommend": [2, 6, 7, 15, 25, 27, 28, 30, 31, 33, 46, 47], "test": [2, 4, 9, 14, 16, 17, 24, 25, 27, 28, 30, 31, 34, 35, 38, 39], "valid": [2, 15, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dataset": [2, 10, 15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "lhots": [2, 9, 14, 16, 18, 19, 20, 25, 27, 30], "readthedoc": [2, 15], "io": [2, 15, 17, 18, 19, 20, 21, 22, 23, 33, 45, 46, 47], "en": [2, 15, 18], "latest": [2, 9, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "index": [2, 15, 17, 18, 19, 20, 21, 22, 23, 45, 46, 47], "yesno": [2, 8, 10, 14, 15, 29, 41, 48], "veri": [2, 3, 7, 18, 19, 20, 27, 38, 39, 41, 46, 47], "good": [2, 7], "exampl": [2, 13, 14, 16, 18, 19, 20, 22, 23, 24, 31, 34, 38, 39, 41], "speech": [2, 13, 14, 15, 17, 26, 27, 41, 48], "pull": [2, 4, 6, 7, 9, 18, 19, 20, 21, 25, 27, 30, 43], "380": [2, 18, 39], "show": [2, 4, 6, 7, 9, 13, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "add": [2, 18, 19, 20, 25, 27, 28, 46, 48], "new": [2, 3, 9, 13, 15, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 41, 45, 46, 47], "suppos": [2, 9, 46, 47], "would": [2, 16, 18, 19, 20, 30, 34, 46, 47], "name": [2, 9, 10, 16, 18, 19, 20, 21, 25, 27, 33, 35, 36, 46, 47], "foo": [2, 23, 25, 30, 33, 35, 36, 45, 46, 47], "eg": [2, 9, 10, 12, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "mkdir": [2, 18, 19, 25, 27, 28, 30, 34, 38, 39, 41], "p": [2, 4, 15, 18, 19, 27, 38, 39], "asr": [2, 4, 6, 7, 9, 10, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "touch": 2, "sh": [2, 9, 15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "chmod": 2, "x": [2, 4, 20, 43], "simpl": [2, 15, 27], "own": [2, 31, 33, 46, 47], "otherwis": [2, 18, 19, 20, 25, 27, 30, 31, 33, 35, 36, 45, 46, 47], "librispeech": [2, 4, 6, 7, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 29, 30, 31, 33, 34, 35, 36, 42, 43, 45, 46, 47, 48], "assum": [2, 4, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 45, 46, 47], "fanci": 2, "call": [2, 10, 21, 31], "bar": [2, 23, 25, 30, 33, 35, 36, 45, 46, 47], "organ": 2, "wai": [2, 3, 24, 33, 35, 36, 43, 45, 46, 47], "readm": [2, 25, 27, 28, 30, 34, 38, 39, 41], "md": [2, 12, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "asr_datamodul": [2, 9, 10, 15], "pretrain": [2, 4, 6, 7, 16, 18, 19, 20, 21, 23, 25, 27, 28, 30, 34, 38, 39, 41], "For": [2, 4, 6, 7, 10, 12, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "instanc": [2, 10, 12, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "tdnn": [2, 9, 10, 15, 26, 29, 32, 37, 40], "its": [2, 4, 16, 17, 18, 19, 20, 23, 27, 35], "directori": [2, 9, 14, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "structur": [2, 20], "descript": [2, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "contain": [2, 8, 14, 16, 17, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "inform": [2, 4, 6, 15, 16, 25, 27, 28, 30, 33, 34, 35, 38, 39, 41, 43, 45, 46, 47], "g": [2, 4, 5, 6, 7, 15, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "wer": [2, 9, 15, 16, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "etc": [2, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "provid": [2, 13, 15, 16, 17, 18, 19, 20, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47, 48], "pytorch": [2, 10, 15, 18, 19, 20, 27], "dataload": [2, 15], "take": [2, 7, 9, 16, 31, 33, 41, 46, 47], "input": [2, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41, 43], "checkpoint": [2, 4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "save": [2, 15, 16, 19, 20, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dure": [2, 4, 5, 7, 10, 13, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "stage": [2, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "": [2, 4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "definit": [2, 18, 19], "neural": [2, 4, 6, 7, 25, 30], "network": [2, 25, 27, 30, 33, 35, 36, 45, 46, 47], "script": [2, 6, 7, 14, 15, 23, 24, 25, 27, 28, 30, 31, 34, 38, 39, 41, 45], "infer": [2, 16, 18, 19], "tdnn_lstm_ctc": [2, 28, 34, 39], "conformer_ctc": [2, 25, 30], "get": [2, 9, 13, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 41, 43, 45, 46, 47], "feel": [2, 31, 45], "result": [2, 4, 7, 9, 12, 13, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "everi": [2, 16, 33, 35, 36, 45, 46, 47], "kept": [2, 33, 46, 47], "self": [2, 17, 20, 43], "toler": 2, "duplic": 2, "among": [2, 15], "differ": [2, 15, 18, 19, 20, 21, 25, 26, 30, 31, 33, 43, 45, 46, 47], "invoc": [2, 18, 19], "help": [2, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "blob": [2, 12, 15, 16, 23, 33, 35, 36, 45, 46, 47], "master": [2, 6, 9, 12, 15, 16, 19, 20, 22, 23, 27, 31, 33, 35, 36, 45, 46, 47], "transform": [2, 6, 7, 25, 30, 45], "conform": [2, 22, 26, 27, 29, 32, 33, 35, 45, 46, 47], "base": [2, 4, 7, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "lstm": [2, 17, 23, 24, 26, 29, 32, 37, 42, 44], "attent": [2, 20, 27, 28, 31, 43, 46, 47], "lm": [2, 4, 7, 9, 14, 15, 27, 33, 34, 38, 39, 41, 46, 47], "rescor": [2, 14, 28, 34, 36, 38, 39, 41], "demonstr": [2, 11, 13, 16, 21], "consid": [2, 4, 20], "colab": [2, 15], "notebook": [2, 15], "welcom": 3, "There": [3, 4, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "mani": [3, 15, 46, 47], "two": [3, 4, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "them": [3, 5, 6, 11, 12, 13, 18, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "To": [3, 4, 6, 7, 13, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "document": [3, 14, 16, 17, 18, 19, 20, 21, 36], "repositori": [3, 9, 18, 19, 20, 21], "recip": [3, 4, 6, 7, 9, 12, 14, 15, 16, 21, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 43, 45, 46, 47], "In": [3, 4, 6, 10, 13, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 31, 34, 38, 39, 41, 43], "page": [3, 13, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "describ": [3, 5, 8, 9, 11, 16, 18, 19, 21, 22, 23, 24, 25, 27, 28, 30, 33, 34, 38, 39, 46, 47], "how": [3, 4, 5, 6, 7, 8, 9, 11, 13, 14, 15, 18, 19, 20, 21, 24, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "creat": [3, 4, 6, 7, 14, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46], "data": [3, 4, 6, 7, 8, 16, 18, 19, 20, 21, 22, 23, 26], "train": [3, 4, 6, 7, 8, 10, 11, 13, 14, 16, 17, 22, 23, 24, 43], "decod": [3, 4, 8, 10, 13, 14, 18, 19, 20, 23, 24], "model": [3, 4, 6, 7, 9, 11, 13, 14, 15, 17, 31, 43], "As": [4, 5, 6, 7, 18, 27, 30, 31], "type": [4, 6, 7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 33, 35, 36, 41, 43, 45, 46, 47], "e2": [4, 7, 15], "usual": [4, 6, 7, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "an": [4, 5, 6, 7, 9, 13, 15, 16, 18, 19, 20, 21, 22, 23, 25, 26, 27, 30, 31, 33, 36, 41, 45, 46, 47], "intern": [4, 5], "languag": [4, 7, 13, 14, 25, 27, 28], "learn": [4, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "level": [4, 5], "corpu": [4, 6, 7, 26], "real": 4, "life": 4, "scenario": 4, "often": [4, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "mismatch": [4, 46], "between": [4, 7, 33, 46, 47], "target": [4, 13, 15], "space": [4, 11, 14], "problem": [4, 6, 7, 15, 31], "when": [4, 6, 9, 10, 13, 18, 19, 20, 24, 27, 30, 31, 33, 35, 36, 46, 47], "act": 4, "against": [4, 15], "extern": [4, 5, 6, 7], "tutori": [4, 6, 7, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "low": [4, 18, 19], "order": [4, 15, 18, 19, 20, 25, 28, 30, 34, 38, 39], "densiti": 4, "ratio": 4, "allevi": 4, "effect": [4, 7, 20], "improv": [4, 5, 6, 7, 27], "perform": [4, 6, 7, 17, 27, 31, 46], "languga": 4, "integr": [4, 13], "pruned_transducer_stateless7_stream": [4, 6, 7, 20, 21, 47], "stream": [4, 6, 7, 14, 17, 18, 19, 21, 24, 25, 30, 38, 39, 45, 48], "howev": [4, 6, 7, 16, 19, 31], "easili": [4, 6, 7, 25, 28, 30], "appli": [4, 6, 7, 27, 43], "other": [4, 7, 16, 19, 20, 21, 27, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47, 48], "encount": [4, 6, 7, 10, 15, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "here": [4, 6, 7, 16, 18, 19, 20, 25, 27, 28, 30, 31, 34, 43, 46], "simplic": [4, 6, 7], "same": [4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "domain": [4, 6, 7], "gigaspeech": [4, 6, 7, 12, 22, 45], "first": [4, 6, 9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "let": [4, 6, 7, 15, 18, 19, 20, 25, 30], "background": 4, "predecessor": 4, "dr": 4, "propos": [4, 27, 43, 47], "address": [4, 9, 13, 15, 16, 18, 19, 20, 27, 33, 36, 45, 46, 47], "sourc": [4, 15, 16, 18, 19, 20, 25, 26, 27, 30], "acoust": [4, 46, 47], "similar": [4, 5, 31, 35, 46, 47], "deriv": 4, "formular": 4, "bay": 4, "theorem": 4, "text": [4, 6, 7, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "score": [4, 5, 7, 25, 30, 33, 46, 47], "left": [4, 18, 20, 27, 46, 47], "y_u": 4, "mathit": 4, "y": 4, "right": [4, 18, 27, 43, 46], "log": [4, 9, 10, 15, 18, 19, 20, 34, 38, 39, 41], "y_": 4, "u": [4, 15, 18, 19, 20, 25, 27, 28, 30, 31, 41], "lambda_1": 4, "p_": 4, "lambda_2": 4, "where": [4, 10, 46], "weight": [4, 25, 28, 30, 35, 36, 45], "respect": 4, "onli": [4, 6, 8, 9, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47, 48], "compar": [4, 18, 19, 20, 46], "shallow": [4, 14], "fusion": [4, 14], "subtract": [4, 5], "work": [4, 9, 18, 19, 20, 30], "treat": [4, 19, 20], "predictor": 4, "joiner": [4, 18, 19, 20, 21, 23, 27, 33, 45, 46, 47], "weak": 4, "captur": 4, "therefor": [4, 10], "n": [4, 5, 6, 15, 25, 31, 33, 35, 36, 38, 39, 45, 46, 47], "gram": [4, 6, 15, 25, 27, 28, 33, 34, 36, 38, 39, 46, 47], "approxim": [4, 5], "ilm": 4, "lead": [4, 7], "formula": 4, "rnnt": [4, 33, 46, 47], "bi": [4, 6], "addit": 4, "estim": 4, "comar": 4, "li": 4, "choic": 4, "accord": 4, "origin": [4, 5], "paper": [4, 5, 31, 33, 45, 46, 47], "achiev": [4, 6, 7, 43], "both": [4, 33, 35, 36, 43, 45, 46, 47], "intra": 4, "cross": 4, "much": [4, 18, 19], "faster": [4, 6], "evalu": 4, "now": [4, 6, 9, 15, 18, 19, 20, 25, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "illustr": [4, 6, 7], "purpos": [4, 6, 7, 18, 19], "from": [4, 6, 7, 9, 10, 11, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "link": [4, 6, 7, 12, 15, 16, 17, 33, 35, 36, 45, 46, 47], "scratch": [4, 6, 7, 33, 35, 36, 45, 46, 47], "prune": [4, 6, 7, 16, 20, 21, 27, 29, 31, 32, 42, 43, 44, 45, 47], "statelessx": [4, 6, 7, 29, 31, 32, 42, 43, 44], "initi": [4, 6, 7, 9, 25, 28], "step": [4, 6, 7, 15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "download": [4, 6, 7, 8, 10, 13, 14, 17, 24, 26, 31], "git_lfs_skip_smudg": [4, 6, 7, 18, 19, 20, 21], "huggingfac": [4, 6, 7, 12, 14, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 35, 36, 38, 39, 41, 45], "co": [4, 6, 7, 12, 13, 15, 16, 18, 19, 20, 21, 25, 26, 27, 28, 30, 34, 35, 36, 38, 39, 41, 45], "zengwei": [4, 6, 7, 18, 20, 21, 36, 45], "stateless7": [4, 6, 7, 20, 21], "2022": [4, 6, 7, 16, 18, 19, 20, 21, 27, 33, 35, 36, 45, 46], "12": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 33, 35, 36, 38, 41, 45, 46, 47], "29": [4, 6, 7, 15, 20, 21, 25, 27, 28, 30, 34, 35, 38, 39], "pushd": [4, 6, 7, 21], "exp": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "lf": [4, 6, 7, 16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 36, 38, 39, 41], "includ": [4, 6, 7, 18, 19, 20, 21, 33, 35, 36, 45, 46, 47], "pt": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "ln": [4, 6, 7, 9, 16, 18, 19, 20, 21, 25, 30, 33, 35, 36, 45, 46, 47], "epoch": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "99": [4, 6, 7, 15, 18, 19, 20, 21], "symbol": [4, 5, 6, 7, 15, 27, 33, 46, 47], "load": [4, 6, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "done": [4, 6, 7, 9, 15, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "via": [4, 6, 7, 15, 17, 22, 23, 24], "exp_dir": [4, 6, 7, 9, 15, 18, 19, 20, 27, 30, 31, 33, 35, 36, 46, 47], "avg": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 22, 23, 27, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "averag": [4, 6, 7, 9, 15, 16, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "fals": [4, 6, 7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 31], "dir": [4, 6, 7, 16, 18, 19, 20, 21, 22, 23, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "bpe": [4, 5, 6, 7, 16, 18, 19, 20, 21, 22, 23, 30, 33, 35, 36, 45, 46, 47], "lang_bpe_500": [4, 6, 7, 16, 18, 19, 20, 21, 22, 23, 30, 33, 35, 36, 45, 46, 47], "max": [4, 6, 7, 15, 16, 18, 19, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "durat": [4, 6, 7, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "600": [4, 6, 7, 15, 16, 30, 33, 35, 45, 46, 47], "chunk": [4, 6, 7, 18, 20, 21, 46, 47], "len": [4, 6, 7, 20, 21, 47], "32": [4, 6, 7, 15, 18, 19, 20, 21, 25, 27, 28, 47], "method": [4, 7, 13, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 45, 46, 47], "modified_beam_search": [4, 5, 6, 7, 13, 27, 31, 33, 35, 45, 46, 47], "clean": [4, 9, 15, 20, 25, 27, 30, 31, 33, 34, 35, 36, 45, 46, 47], "beam_size_4": [4, 6, 7], "11": [4, 6, 7, 9, 10, 15, 18, 19, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "best": [4, 5, 6, 7, 18, 19, 20, 25, 28, 30], "7": [4, 6, 7, 9, 15, 16, 17, 20, 24, 25, 28, 30, 33, 34, 38, 39, 45, 46], "93": [4, 6, 7], "Then": [4, 6], "necessari": [4, 31], "note": [4, 5, 6, 7, 10, 16, 18, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "960": [4, 30, 33, 35, 36, 45, 46, 47], "hour": [4, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "ezerhouni": [4, 6, 7], "popd": [4, 6, 7, 21], "marcoyang": [4, 6], "librispeech_bigram": [4, 6], "2gram": [4, 6], "fst": [4, 15, 27, 41], "modified_beam_search_lm_lodr": 4, "lm_dir": [4, 6, 7, 9, 15, 30], "lm_scale": [4, 6, 7], "42": [4, 9, 15, 19, 25, 30, 41], "lodr_scal": 4, "24": [4, 9, 10, 15, 18, 19, 28, 34, 38, 39, 41], "scale": [4, 6, 7, 18, 19, 25, 30, 31, 34, 36, 38, 39], "embed": [4, 6, 7, 27, 33, 45, 46, 47], "dim": [4, 6, 7, 18, 19, 20, 27, 33, 46], "2048": [4, 6, 7, 16, 18, 19, 20, 27], "hidden": [4, 6, 7, 19, 45], "num": [4, 6, 7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "layer": [4, 6, 7, 18, 19, 20, 27, 31, 33, 43, 45, 46, 47], "vocab": [4, 6, 7, 30], "500": [4, 6, 7, 16, 18, 19, 20, 27, 30, 36, 45], "token": [4, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "ngram": [4, 30, 34, 38, 39], "2": [4, 6, 7, 9, 14, 16, 17, 24, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "extra": [4, 18, 19, 20, 27, 43, 46], "argument": [4, 7, 31, 43], "need": [4, 6, 13, 15, 16, 17, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "given": [4, 9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 46, 47], "specifi": [4, 7, 10, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "neg": [4, 27], "number": [4, 7, 13, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "obtain": [4, 7, 25, 27, 28, 30, 34, 38, 39], "shown": [4, 7], "below": [4, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46], "61": [4, 6], "6": [4, 6, 7, 9, 10, 17, 24, 25, 27, 30, 33, 34, 38, 39, 45], "74": [4, 6, 15, 16], "recal": 4, "lowest": [4, 33, 35, 36, 45, 46, 47], "77": [4, 6, 7, 15, 30], "08": [4, 6, 7, 9, 20, 30, 34, 36, 38, 39, 41, 45], "inde": 4, "even": [4, 13, 15, 19], "better": [4, 6], "increas": [4, 6, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "8": [4, 6, 7, 9, 10, 15, 16, 18, 19, 20, 25, 27, 30, 31, 33, 34, 35, 36, 41, 45, 46, 47], "45": [4, 6, 15, 18, 20, 25, 27, 30], "38": [4, 6, 15, 18, 25, 27, 30, 38], "23": [4, 6, 9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 38, 39, 41], "section": [5, 8, 9, 10, 11, 15, 16, 21, 22, 23, 24, 25, 30], "langugag": 5, "transduc": [5, 14, 16, 17, 21, 24, 26, 29, 31, 32, 42, 43, 44], "avail": [5, 6, 8, 14, 15, 16, 18, 19, 20, 25, 27, 30, 34, 38, 39, 41, 45], "beam": [5, 16, 45], "search": [5, 6, 7, 12, 13], "realli": [5, 25, 28, 30, 33, 35, 36, 45, 46, 47], "valu": [5, 7, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "rnn": [5, 6, 7, 14, 19, 27, 33, 35, 45, 46, 47], "t": [5, 15, 18, 19, 20, 21, 22, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "doe": [5, 18, 19, 20, 25, 27, 30, 41], "modified_beam_search_lm_shallow_fus": [5, 6, 7], "interpol": 5, "also": [5, 6, 7, 11, 12, 15, 16, 17, 18, 19, 20, 21, 23, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "known": 5, "modified_beam_search_lodr": [5, 6], "bigram": 5, "backoff": 5, "modified_beam_search_lm_rescor": [5, 6], "hypothes": [5, 6], "rnnlm": [5, 6], "re": [5, 6, 10, 25, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "rank": [5, 6], "modified_beam_search_lm_rescore_lodr": [5, 6], "lodr": [5, 14], "commonli": [6, 7, 25, 27, 28, 30, 34, 38, 39, 41], "approach": 6, "incorpor": 6, "unlik": 6, "more": [6, 15, 18, 19, 20, 25, 30, 31, 41, 43, 45, 46], "effici": [6, 7, 33, 46, 47], "than": [6, 15, 16, 19, 25, 27, 28, 30, 33, 34, 35, 36, 41, 45, 46, 47], "sinc": [6, 15, 18, 19, 20, 31, 41, 45], "less": [6, 16, 30, 34, 41, 46, 47], "comput": [6, 15, 16, 18, 19, 20, 25, 27, 28, 31, 33, 34, 36, 38, 39, 41, 45, 46, 47], "gpu": [6, 7, 8, 14, 15, 18, 19, 25, 27, 28, 30, 31, 33, 35, 36, 38, 39, 41, 45, 46, 47], "try": [6, 10, 11, 13, 31, 33, 35, 36, 45, 46, 47], "might": [6, 7, 19, 20, 46, 47], "ideal": [6, 7], "mai": [6, 7, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47, 48], "With": [6, 15], "43": [6, 9, 19, 20, 30], "great": 6, "made": [6, 18], "boost": [6, 7], "tabl": [6, 13, 18, 19, 20], "67": [6, 15], "59": [6, 15, 18, 28, 30], "86": 6, "fact": 6, "arpa": [6, 41], "performn": 6, "depend": [6, 15, 25, 30], "kenlm": 6, "kpu": 6, "archiv": 6, "zip": 6, "execut": [6, 7, 18, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "9": [6, 9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 41, 45, 46, 47], "57": [6, 15, 19, 30, 34], "slightli": 6, "63": [6, 27], "04": [6, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "52": [6, 15, 25, 30], "73": 6, "mention": 6, "earlier": 6, "benchmark": [6, 27], "speed": [6, 18, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "132": 6, "95": [6, 26], "177": [6, 15, 16, 19, 20, 27, 28, 30], "96": [6, 15], "210": [6, 38, 39], "262": [6, 7], "62": [6, 7, 15, 30, 34], "65": [6, 7, 15, 18], "352": [6, 7, 30], "58": [6, 7, 10, 15, 30], "488": [6, 7, 18, 19, 20], "400": [6, 9, 26], "610": 6, "870": 6, "156": 6, "203": [6, 16, 30], "255": [6, 19, 20], "160": 6, "263": [6, 9, 15, 19], "singl": [6, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "32g": 6, "v100": [6, 25, 27, 28, 30], "vari": 6, "word": [7, 25, 27, 28, 30, 34, 38, 39, 41], "error": [7, 9, 10, 15, 18, 19, 20, 30], "rate": [7, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "These": [7, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "alreadi": [7, 15, 16], "But": [7, 18, 33, 35, 36, 45, 46, 47], "long": [7, 18], "true": [7, 9, 15, 16, 18, 19, 20, 25, 27, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "either": [7, 13, 25, 27, 28, 30, 46, 47], "choos": [7, 13, 15, 31, 33, 35, 36, 45, 46, 47], "three": [7, 18, 19, 20, 23, 25, 27, 43], "associ": 7, "dimens": [7, 33, 46, 47], "obviou": 7, "rel": 7, "reduct": [7, 15, 18, 19, 35], "around": 7, "A": [7, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 45, 46, 47], "few": [7, 18, 19, 20, 31], "paramet": [7, 16, 18, 19, 20, 22, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "tune": [7, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "control": [7, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "too": 7, "small": [7, 27, 38, 39, 41], "fulli": 7, "util": [7, 9, 10, 15, 30], "larg": 7, "domin": 7, "bad": 7, "typic": [7, 25, 27, 28, 30], "activ": [7, 13, 15], "path": [7, 9, 13, 15, 16, 18, 19, 20, 23, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "trade": 7, "off": [7, 18], "accuraci": [7, 18, 19, 26], "larger": [7, 19, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "slower": 7, "built": [8, 9, 15], "imag": [8, 14], "cpu": [8, 14, 15, 16, 18, 19, 20, 22, 25, 33, 35, 36, 41, 46, 47], "still": [8, 18, 19, 20], "introduct": [8, 14, 42, 48], "tag": [8, 14], "within": [8, 11, 13, 14, 18, 19], "updat": [8, 18, 19, 20], "host": [9, 16], "hub": 9, "k2fsa": 9, "find": [9, 10, 11, 12, 13, 16, 18, 19, 20, 23, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "dockerfil": 9, "tree": [9, 22, 23, 25, 27, 28, 30, 34, 38, 39, 41, 45], "item": 9, "curl": 9, "registri": 9, "v2": [9, 20, 25, 30], "jq": 9, "give": [9, 27], "someth": [9, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "torch2": 9, "cuda11": [9, 10, 15], "torch1": [9, 10, 15], "cuda10": 9, "13": [9, 10, 15, 16, 18, 19, 20, 27, 28, 30, 34, 35, 38], "releas": [9, 15, 16, 18, 19, 20, 25, 27, 30], "torch": [9, 10, 14, 16, 17, 24, 25, 27, 30], "select": [9, 13, 15, 18, 19, 20, 33, 34, 38, 39, 41, 45, 46, 47], "appropri": [9, 15], "combin": [9, 18, 19, 20], "cuda": [9, 10, 14, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "sudo": [9, 25, 28], "rm": 9, "bin": [9, 15, 18, 19, 20, 25, 30], "bash": 9, "start": [9, 13, 15, 16, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "interfac": 9, "present": [9, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "root": [9, 18, 19, 20], "60c947eac59c": 9, "workspac": 9, "current": [9, 13, 18, 19, 27, 31, 43, 45, 46, 47, 48], "user": [9, 10], "copi": [9, 15, 43], "switch": [9, 15, 25, 30, 36], "opt": 9, "conda": [9, 10], "lib": [9, 10, 15, 20], "site": [9, 10, 15, 20], "packag": [9, 10, 15, 20], "__init__": [9, 10, 15, 16, 18, 19, 20, 25, 27, 30], "line": [9, 10, 18, 19, 20, 33, 46, 47], "modul": [9, 14, 18, 20, 35, 46], "_k2": [9, 10, 15], "determinizeweightpushingtyp": [9, 10], "importerror": [9, 14], "libcuda": 9, "cannot": [9, 14, 18, 19, 20], "share": [9, 14, 15], "object": [9, 14, 15, 25, 27, 28, 33, 41, 45, 46], "No": [9, 14, 18, 19, 20, 41], "stub": 9, "list": [9, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "16": [9, 15, 16, 18, 19, 20, 23, 25, 27, 28, 30, 33, 34, 38, 39, 41, 45, 46, 47], "second": [9, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "2023": [9, 15, 18, 19, 20, 35], "01": [9, 15, 18, 27, 28, 30, 31, 35], "02": [9, 15, 16, 18, 19, 20, 27, 30, 33, 39, 45, 46], "06": [9, 15, 16, 18, 28, 30, 34, 41], "info": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "264": [9, 15, 20], "posixpath": [9, 15, 18, 19, 20, 27, 30], "lang_dir": [9, 15, 27, 30], "lang_phon": [9, 15, 28, 34, 38, 39, 41], "feature_dim": [9, 15, 16, 18, 19, 20, 25, 27, 30, 41], "search_beam": [9, 15, 25, 30, 41], "20": [9, 15, 16, 18, 20, 25, 27, 28, 30, 33, 34, 38, 39, 41, 46], "output_beam": [9, 15, 25, 30, 41], "min_active_st": [9, 15, 25, 30, 41], "30": [9, 10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "max_active_st": [9, 15, 25, 30, 41], "10000": [9, 15, 25, 30, 41], "use_double_scor": [9, 15, 25, 30, 41], "14": [9, 10, 15, 16, 18, 19, 22, 25, 30, 33, 34, 35, 38, 45, 46, 47], "export": [9, 10, 14, 15, 25, 27, 28, 30, 31, 34, 38, 39, 41], "feature_dir": [9, 15, 30], "fbank": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "max_dur": [9, 15, 30], "bucketing_sampl": [9, 15, 30], "num_bucket": [9, 15, 30], "concatenate_cut": [9, 15, 30], "duration_factor": [9, 15, 30], "gap": [9, 15, 30], "on_the_fly_feat": [9, 15, 30], "shuffl": [9, 15, 30], "return_cut": [9, 15, 30], "num_work": [9, 15, 30], "env_info": [9, 15, 16, 18, 19, 20, 25, 27, 30], "sha1": [9, 15, 16, 18, 19, 20, 25, 27, 30], "4c05309499a08454997adf500b56dcc629e35ae5": [9, 15], "date": [9, 15, 16, 18, 19, 20, 25, 27, 30], "tue": [9, 15, 18, 30], "jul": [9, 15], "25": [9, 15, 16, 18, 19, 25, 30, 33, 38, 39, 41, 46], "36": [9, 15, 18, 27, 30, 31], "dev": [9, 10, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "7640d663": 9, "branch": [9, 15, 16, 18, 19, 20, 25, 27, 30, 35], "375520d": 9, "fri": [9, 16], "28": [9, 15, 18, 19, 27, 30, 34], "07": [9, 15, 18, 19, 20, 25, 27, 28, 30], "hostnam": [9, 15, 16, 18, 19, 20, 27], "ip": [9, 15, 16, 18, 19, 20, 27], "172": 9, "17": [9, 15, 16, 18, 19, 20, 25, 30, 38, 39, 45], "401": 9, "lexicon": [9, 15, 25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "168": [9, 15, 34], "compil": [9, 15, 18, 19, 25, 27, 30], "linv": [9, 15, 27, 30, 41], "403": [9, 34], "273": [9, 15, 16, 27], "devic": [9, 15, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 46, 47], "406": [9, 30], "291": [9, 15], "424": 9, "218": [9, 15, 19], "about": [9, 15, 18, 19, 20, 27, 31, 33, 36, 45, 46, 47], "cut": [9, 15, 30], "425": [9, 19, 30], "252": [9, 15], "504": 9, "204": [9, 15, 20, 30], "batch": [9, 15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "process": [9, 15, 16, 18, 19, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "until": [9, 15, 30, 35], "w": [9, 15, 30, 38, 39], "nnpack": 9, "cpp": [9, 18, 22], "53": [9, 15, 20, 25, 33, 34, 39, 45, 46], "could": [9, 18, 19, 20, 25, 28], "reason": [9, 16, 18, 19, 20, 46], "unsupport": 9, "hardwar": 9, "687": 9, "241": [9, 15, 25], "transcript": [9, 15, 25, 26, 27, 28, 30, 33, 34, 38, 39, 45, 46, 47], "store": [9, 15, 30], "recog": [9, 15, 27, 30], "test_set": [9, 15, 41], "688": 9, "564": [9, 15], "240": [9, 15, 25, 41], "ins": [9, 15, 30, 41], "del": [9, 15, 30, 41], "sub": [9, 15, 30, 41], "690": 9, "249": [9, 15, 19], "wrote": [9, 15, 30], "detail": [9, 15, 17, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "stat": [9, 15, 30], "err": [9, 15, 27, 30], "316": [9, 15, 30], "congratul": [9, 15, 18, 19, 20, 25, 28, 30, 34, 38, 39, 41], "finish": [9, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "successfulli": [9, 15, 18, 19, 20], "collect": [10, 15], "post": 10, "correspond": [10, 12, 13], "solut": 10, "One": 10, "torchaudio": [10, 14, 43], "cu111": 10, "torchvis": 10, "f": [10, 15, 38, 39], "org": [10, 15, 26, 27, 33, 45, 46, 47], "whl": [10, 15], "torch_stabl": [10, 15], "throw": [10, 18, 19, 20], "while": [10, 15, 18, 19, 20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "That": [10, 18, 19, 31, 33, 45, 46, 47], "cu11": 10, "correct": 10, "traceback": 10, "most": [10, 46, 47], "recent": [10, 18, 19, 20], "last": 10, "yesnoasrdatamodul": 10, "home": [10, 18, 19, 25, 30], "xxx": [10, 16, 18, 19, 20], "next": [10, 13, 15, 18, 19, 20, 30, 31, 33, 34, 35, 36, 45, 46, 47], "gen": [10, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "kaldi": [10, 13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "34": [10, 18, 19], "datamodul": 10, "add_eo": 10, "add_so": 10, "get_text": 10, "39": [10, 15, 18, 20, 27, 30, 34, 38], "tensorboard": [10, 15, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "summarywrit": 10, "miniconda3": 10, "env": 10, "yyi": 10, "loosevers": 10, "uninstal": 10, "setuptool": [10, 15], "yangyifan": 10, "anaconda3": 10, "dev20230112": 10, "py3": [10, 15], "linux": [10, 13, 15, 17, 18, 19, 20, 21], "x86_64": [10, 15, 18], "egg": 10, "handl": [10, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "except": [10, 16], "anoth": 10, "occur": 10, "pruned_transducer_stateless7_ctc_b": [10, 35], "104": [10, 15], "rais": 10, "anaconda": 10, "maco": [10, 13, 17, 18, 19, 20, 21], "probabl": [10, 27, 33, 35, 45, 46, 47], "variabl": [10, 15, 18, 19, 20, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "dyld_library_path": 10, "conda_prefix": 10, "locat": [10, 18], "libpython": 10, "abl": 10, "insid": [10, 23], "codna_prefix": 10, "ld_library_path": 10, "anyth": [11, 13], "youtub": [11, 14, 30, 31, 33, 34, 35, 36, 45, 46, 47], "video": [11, 14, 30, 31, 33, 34, 35, 36, 45, 46, 47], "upload": [12, 13, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "visit": [12, 13, 33, 35, 36, 45, 46, 47], "specif": [12, 21, 27], "aishel": [12, 14, 25, 27, 28, 29, 48], "wenetspeech": [12, 22], "framework": [13, 33, 46], "sherpa": [13, 17, 22, 23, 24, 45], "window": [13, 17, 18, 19, 20, 21], "ipad": 13, "phone": 13, "recognit": [13, 14, 17, 18, 19, 26, 27, 41, 48], "screenshot": [13, 25, 27, 28, 30, 31, 33, 41, 45, 46], "chines": [13, 26, 27], "english": [13, 41, 45], "greedi": 13, "record": [13, 19, 20, 25, 26, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "click": [13, 15, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "button": 13, "submit": 13, "wait": 13, "moment": 13, "bottom": [13, 33, 35, 36, 45, 46, 47], "part": [13, 15, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "one": [13, 16, 18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "subscrib": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "channel": [13, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "nadira": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "povei": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "www": [13, 15, 26, 30, 31, 33, 34, 35, 36, 45, 46, 47], "uc_vaumpkminz1pnkfxan9mw": [13, 15, 30, 31, 33, 34, 35, 36, 45, 46, 47], "toolkit": 14, "cudnn": 14, "docker": [14, 15], "frequent": 14, "ask": 14, "question": 14, "faq": 14, "oserror": 14, "libtorch_hip": 14, "attributeerror": 14, "distutil": 14, "attribut": [14, 20, 30], "libpython3": 14, "state_dict": [14, 24, 25, 27, 28, 30, 34, 38, 39, 41], "jit": [14, 17, 24, 30], "trace": [14, 17, 22, 24], "onnx": [14, 16, 24], "ncnn": [14, 24], "non": [14, 30, 43, 46, 48], "timit": [14, 29, 38, 39, 48], "contribut": 14, "support": [15, 17, 18, 19, 20, 25, 27, 30, 33, 35, 36, 43, 45, 46, 47], "setup": [15, 18, 25, 27, 28, 30, 31, 33, 34, 38, 39, 41, 46, 47], "guid": 15, "matter": [15, 18], "don": [15, 18, 19, 20, 22, 25, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "suggest": [15, 33, 35, 36, 45, 46, 47], "wheel": [15, 18], "from_wheel": 15, "alwai": [15, 16], "strongli": 15, "pythonpath": [15, 18, 19, 20], "point": [15, 16, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "folder": [15, 16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "tmp": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "put": [15, 18, 19, 35, 46], "sever": [15, 16, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "just": [15, 18, 19, 20, 43], "kuangfangjun": [15, 18, 19, 20], "virtualenv": 15, "cpython3": 15, "final": [15, 16, 18, 19, 30, 34], "64": [15, 16, 18, 27, 46], "9422m": 15, "creator": 15, "cpython3posix": 15, "dest": 15, "star": [15, 18, 19, 20], "fj": [15, 16, 18, 19, 20, 27, 30], "fangjun": [15, 16, 18, 19, 20, 27, 30], "clear": 15, "no_vcs_ignor": 15, "global": 15, "seeder": 15, "fromappdata": 15, "bundl": 15, "app_data_dir": 15, "ad": [15, 18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 43, 45, 46, 47], "seed": 15, "bashactiv": 15, "cshellactiv": 15, "fishactiv": 15, "nushellactiv": 15, "powershellactiv": 15, "pythonactiv": 15, "determin": 15, "nvidia": [15, 25, 27, 28, 30], "smi": 15, "head": [15, 27, 43], "wed": [15, 18, 25, 27, 30], "26": [15, 18, 19, 20, 27, 30, 39], "21": [15, 16, 18, 25, 27, 30, 38, 39], "49": [15, 18, 19, 30, 39, 41], "510": 15, "47": [15, 18, 19, 20, 25, 30], "03": [15, 16, 19, 27, 30, 38, 39, 45], "driver": 15, "greater": 15, "our": [15, 18, 19, 20, 22, 23, 30, 31, 33, 43, 46, 47], "case": [15, 16, 18, 19, 20, 33, 35, 36, 45, 46, 47], "verifi": 15, "nvcc": 15, "copyright": 15, "c": [15, 27, 28, 33, 35, 36, 41, 45, 46, 47], "2005": 15, "2019": 15, "corpor": 15, "wed_oct_23_19": 15, "38_pdt_2019": 15, "v10": 15, "89": [15, 25], "cu116": 15, "compat": 15, "audio": [15, 38, 39], "stabl": 15, "matrix": 15, "2bcu116": 15, "cp38": 15, "linux_x86_64": 15, "1983": 15, "mb": [15, 18, 19, 20], "________________________________________": 15, "gb": [15, 27], "764": 15, "kb": [15, 18, 19, 20, 38, 39], "eta": 15, "00": [15, 18, 25, 27, 28, 30, 34, 38, 39, 41], "satisfi": 15, "extens": 15, "__version__": 15, "dev20230725": 15, "pypi": 15, "tuna": 15, "tsinghua": 15, "edu": 15, "cn": 15, "csukuangfj": [15, 16, 18, 19, 21, 25, 27, 28, 30, 34, 38, 39, 41, 45], "resolv": 15, "main": [15, 25, 30, 43], "ubuntu": [15, 18, 19, 20], "2bcuda11": 15, "manylinux_2_17_x86_64": 15, "manylinux2014_x86_64": 15, "graphviz": 15, "cach": [15, 20], "de": [15, 16, 18, 19, 20, 27], "5e": 15, "fcbb22c68208d39edff467809d06c9d81d7d27426460ebc598e55130c1aa": 15, "none": [15, 25, 30], "o": 15, "cento": 15, "2009": 15, "core": 15, "cmake": [15, 18, 19, 25, 30], "27": [15, 18, 19, 20, 25, 27, 34, 39], "gcc": 15, "cmake_cuda_flag": 15, "wno": 15, "deprec": [15, 27], "lineinfo": 15, "expt": 15, "extend": 15, "lambda": 15, "use_fast_math": 15, "xptxa": 15, "gencod": 15, "arch": 15, "compute_35": 15, "sm_35": 15, "compute_50": 15, "sm_50": 15, "compute_60": 15, "sm_60": 15, "compute_61": 15, "sm_61": 15, "compute_70": 15, "sm_70": 15, "compute_75": 15, "sm_75": 15, "compute_80": 15, "sm_80": 15, "compute_86": 15, "sm_86": 15, "donnx_namespac": 15, "onnx_c2": 15, "compute_52": 15, "sm_52": 15, "xcudaf": 15, "diag_suppress": 15, "cc_clobber_ignor": 15, "integer_sign_chang": 15, "useless_using_declar": 15, "set_but_not_us": 15, "field_without_dll_interfac": 15, "base_class_has_different_dll_interfac": 15, "dll_interface_conflict_none_assum": 15, "dll_interface_conflict_dllexport_assum": 15, "implicit_return_from_non_void_funct": 15, "unsigned_compare_with_zero": 15, "declared_but_not_referenc": 15, "bad_friend_decl": 15, "relax": 15, "constexpr": 15, "d_glibcxx_use_cxx11_abi": 15, "option": [15, 17, 21, 24, 27, 31, 34, 38, 39, 41], "wall": 15, "strict": [15, 20, 26], "overflow": 15, "unknown": 15, "pragma": 15, "cmake_cxx_flag": 15, "unus": 15, "nvtx": 15, "enabl": [15, 31], "disabl": [15, 16, 18, 19], "debug": 15, "sync": 15, "kernel": [15, 18, 20, 27], "memori": [15, 18, 25, 27, 30, 43], "alloc": 15, "214748364800": 15, "byte": [15, 18, 19, 20], "200": [15, 16, 18, 19, 20, 25, 30, 31, 38, 39, 41], "abort": 15, "__file__": 15, "cpython": [15, 18], "gnu": [15, 18], "req": 15, "vq12fd5i": 15, "filter": 15, "quiet": [15, 26], "7640d663469b22cd0b36f3246ee9b849cd25e3b7": 15, "metadata": [15, 38, 39], "pyproject": 15, "toml": 15, "cytoolz": 15, "1e": 15, "3b": 15, "a7828d575aa17fb7acaf1ced49a3655aa36dad7e16eb7e6a2e4df0dda76f": 15, "33": [15, 18, 19, 25, 26, 27, 30, 38], "pyyaml": 15, "c8": 15, "6b": 15, "6600ac24725c7388255b2f5add93f91e58a5d7efaf4af244fdbcc11a541b": 15, "ma": 15, "nylinux_2_17_x86_64": 15, "736": 15, "dataclass": 15, "2f": 15, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 15, "dev0": 15, "7640d66": 15, "lilcom": 15, "a8": 15, "df0a69c52bd085ca1ad4e5c4c1a5c680e25f9477d8e49316c4ff1e5084a4": 15, "linux_2_17_x86_64": 15, "87": [15, 18], "tqdm": 15, "e6": 15, "a2cff6306177ae6bc73bc0665065de51dfb3b9db7373e122e2735faf0d97": 15, "numpi": 15, "18": [15, 18, 19, 20, 25, 27, 28, 30, 33, 34, 38, 39, 45, 46, 47], "audioread": 15, "5d": 15, "cb": 15, "82a002441902dccbe427406785db07af10182245ee639ea9f4d92907c923": 15, "tar": 15, "gz": 15, "377": 15, "tabul": 15, "40": [15, 18, 19, 20, 28, 30, 34, 38, 39], "44": [15, 18, 19, 30, 38, 39], "4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854": 15, "35": [15, 16, 18, 19, 20, 27, 30, 45], "1a": 15, "70": 15, "e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f": 15, "97": [15, 18, 25], "ab": [15, 33, 45, 46, 47], "c3": 15, "57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121": 15, "48": [15, 18, 19, 25, 27], "intervaltre": 15, "50": [15, 16, 18, 19, 20, 30, 33, 38, 45, 46, 47], "fb": 15, "396d568039d21344639db96d940d40eb62befe704ef849b27949ded5c3bb": 15, "soundfil": 15, "bd": 15, "0602167a213d9184fc688b1086dc6d374b7ae8c33eccf169f9b50ce6568c": 15, "py2": 15, "46": [15, 19, 25, 30], "toolz": 15, "7f": 15, "5c": 15, "922a3508f5bda2892be3df86c74f9cf1e01217c2b1f8a0ac4841d903e3e9": 15, "55": [15, 18, 28, 30, 38], "sortedcontain": 15, "9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621c": 15, "cffi": 15, "b7": 15, "8b": 15, "06f30caa03b5b3ac006de4f93478dbd0239e2a16566d81a106c322dc4f79": 15, "15": [15, 16, 18, 19, 20, 27, 28, 30, 38, 41], "442": 15, "pycpars": 15, "d5": 15, "5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53": 15, "118": [15, 30], "filenam": [15, 18, 19, 20, 21, 22, 23, 35, 36, 45, 47], "size": [15, 16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "687627": 15, "sha256": 15, "cbf0a4d2d0b639b33b91637a4175bc251d6a021a069644ecb1a9f2b3a83d072a": 15, "ephem": 15, "wwtk90_m": 15, "7a": 15, "8e": 15, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 15, "23704": 15, "5e2d3537c96ce9cf0f645a654c671163707bf8cb8d9e358d0e2b0939a85ff4c2": 15, "9c": 15, "f19ae5a03f8862d9f0776b0c0570f1fdd60a119d90954e3f39": 15, "26098": 15, "2604170976cfffe0d2f678cb1a6e5b525f561cd50babe53d631a186734fec9f9": 15, "f3": 15, "ed": 15, "2b": 15, "c179ebfad4e15452d6baef59737f27beb9bfb442e0620f7271": 15, "remot": 15, "enumer": 15, "12942": 15, "count": 15, "100": [15, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "compress": 15, "56": [15, 18, 19, 30, 38], "total": [15, 19, 20, 25, 27, 28, 30, 31, 33, 34, 41, 45, 46], "delta": 15, "reus": 15, "pack": [15, 46, 47], "12875": 15, "receiv": 15, "mib": 15, "8835": 15, "41": [15, 18, 20, 25, 27, 38, 41], "dl_dir": [15, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "waves_yesno": 15, "___________________________________________________": 15, "70m": 15, "1mb": 15, "manifest": [15, 31], "718": 15, "compute_fbank_yesno": 15, "extract": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "featur": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "_______________________________________________________________________________": 15, "90": [15, 18], "82it": 15, "778": 15, "______________________________________________________________________________": 15, "256": [15, 20, 38, 39], "92it": 15, "51": [15, 18, 25, 30, 41], "lang": [15, 16, 27, 30, 36], "66": [15, 19], "project": 15, "kaldilm": 15, "csrc": [15, 30], "arpa_file_pars": 15, "cc": 15, "void": 15, "arpafilepars": 15, "std": 15, "istream": 15, "79": 15, "140": [15, 28], "92": [15, 30], "hlg": [15, 34, 38, 39, 41], "275": [15, 25], "compile_hlg": 15, "124": [15, 25, 30], "276": 15, "171": [15, 28, 30, 38, 39], "convert": [15, 18, 19, 20, 30], "l": [15, 18, 19, 20, 27, 38, 39, 41], "309": 15, "ctc_topo": 15, "max_token_id": 15, "310": 15, "314": 15, "intersect": [15, 33, 46, 47], "323": 15, "lg": [15, 33, 36, 46, 47], "shape": [15, 20], "connect": [15, 16, 30, 33, 34, 45, 46, 47], "68": [15, 30], "class": [15, 30], "tensor": [15, 19, 20, 25, 27, 28, 30, 33, 41, 45, 46], "71": [15, 30, 34], "341": 15, "rag": 15, "raggedtensor": 15, "76": [15, 41], "remov": [15, 25, 27, 28, 30, 34, 38, 39], "disambigu": 15, "354": 15, "91": 15, "remove_epsilon": 15, "445": 15, "arc": 15, "compos": 15, "h": 15, "446": 15, "106": [15, 19, 30], "109": [15, 25, 30], "447": 15, "111": [15, 30], "127": [15, 18, 19, 41], "cuda_visible_devic": [15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "segment": 15, "fault": 15, "dump": 15, "protocol_buffers_python_implement": 15, "674": 15, "interest": [15, 31, 33, 35, 36, 45, 46, 47], "936": 15, "481": 15, "482": 15, "lr": [15, 27, 45], "weight_decai": 15, "start_epoch": 15, "best_train_loss": [15, 16, 18, 19, 20], "inf": [15, 16, 18, 19, 20], "best_valid_loss": [15, 16, 18, 19, 20], "best_train_epoch": [15, 16, 18, 19, 20], "best_valid_epoch": [15, 16, 19, 20], "batch_idx_train": [15, 16, 18, 19, 20], "log_interv": [15, 16, 18, 19, 20], "reset_interv": [15, 16, 18, 19, 20], "valid_interv": [15, 16, 18, 19, 20], "beam_siz": [15, 16, 27], "sum": 15, "world_siz": [15, 31], "master_port": 15, "12354": 15, "num_epoch": 15, "3fb0a43": 15, "thu": [15, 16, 18, 19, 20, 27, 30, 34], "05": [15, 16, 18, 19, 25, 27, 28, 30, 39], "74279": [15, 16, 18, 19, 20, 27], "1220091118": 15, "57c4d55446": 15, "sph26": 15, "941": 15, "949": 15, "495": 15, "965": [15, 25], "146": 15, "244": 15, "967": 15, "149": [15, 18, 30], "199": [15, 30, 34], "singlecutsampl": 15, "205": [15, 30], "968": 15, "565": [15, 30], "422": 15, "loss": [15, 18, 19, 25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "065": 15, "over": [15, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "2436": 15, "frame": [15, 27, 33, 35, 46, 47], "tot_loss": 15, "681": [15, 18], "4561": 15, "2828": 15, "7076": 15, "22192": 15, "54": [15, 19, 20, 30, 34, 38, 39], "167": 15, "444": 15, "9002": 15, "18067": 15, "011": 15, "2555": 15, "2695": 15, "484": 15, "34971": 15, "331": [15, 18, 19, 30, 34], "4688": 15, "368": 15, "75": [15, 18], "633": 15, "2532": 15, "242": [15, 25, 30], "1139": 15, "1592": 15, "522": [15, 30], "1627": 15, "209": [15, 34], "07055": 15, "1175": 15, "07091": 15, "640": [15, 20], "847": 15, "07731": 15, "427": [15, 19, 30], "04391": 15, "05341": 15, "884": 15, "04384": 15, "387": [15, 39], "03458": 15, "04616": 15, "707": [15, 25, 30], "03379": 15, "758": [15, 30], "433": [15, 30], "01054": 15, "980": [15, 30], "009014": 15, "009974": 15, "489": [15, 25], "01085": 15, "258": [15, 38, 39], "01172": 15, "01055": 15, "621": [15, 41], "01074": 15, "699": 15, "866": 15, "01044": 15, "844": 15, "008942": 15, "221": [15, 30], "01082": 15, "970": [15, 30], "01169": 15, "247": 15, "01073": 15, "326": [15, 19], "555": 15, "840": 15, "841": 15, "855": 15, "868": 15, "882": 15, "883": 15, "157": 15, "701": 15, "702": [15, 30], "704": [15, 25, 38], "fun": [15, 18, 19], "variou": [15, 21, 24, 48], "period": [16, 18], "disk": 16, "optim": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "relat": [16, 25, 27, 30, 34, 38, 39, 41], "resum": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "strip": 16, "reduc": [16, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "each": [16, 18, 19, 21, 25, 27, 28, 30, 33, 35, 36, 43, 45, 46, 47], "well": [16, 41, 48], "usag": [16, 18, 19, 20, 22, 23, 34, 38, 39, 41], "pruned_transducer_stateless3": [16, 22, 43], "almost": [16, 33, 43, 46, 47], "dict": [16, 20], "stateless3": [16, 18], "repo": [16, 21], "prefix": 16, "those": 16, "wave": [16, 18, 19, 20, 25, 30], "iter": [16, 18, 19, 20, 23, 33, 35, 36, 45, 46, 47], "1224000": 16, "greedy_search": [16, 27, 33, 35, 45, 46, 47], "test_wav": [16, 18, 19, 20, 21, 25, 27, 28, 30, 34, 38, 39, 41], "1089": [16, 18, 19, 20, 21, 30, 34], "134686": [16, 18, 19, 20, 21, 30, 34], "0001": [16, 18, 19, 20, 21, 30, 34], "wav": [16, 18, 19, 20, 21, 23, 25, 27, 28, 30, 33, 35, 36, 38, 39, 41, 45, 46, 47], "1221": [16, 18, 19, 30, 34], "135766": [16, 18, 19, 30, 34], "0002": [16, 18, 19, 30, 34], "multipl": [16, 25, 27, 28, 30, 34, 38, 39, 41], "sound": [16, 18, 19, 20, 23, 24, 25, 27, 28, 30, 34, 38, 39, 41], "Its": [16, 18, 19, 20, 30], "output": [16, 18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "19": [16, 18, 19, 20, 25, 30, 34, 38, 39], "09": [16, 19, 25, 27, 28, 30, 45], "233": [16, 18, 19], "265": 16, "3000": [16, 18, 19, 20], "80": [16, 18, 19, 20, 25, 27, 30], "subsampling_factor": [16, 19, 20, 25, 27, 30], "encoder_dim": [16, 18, 19, 20], "512": [16, 18, 19, 20, 25, 27, 30], "nhead": [16, 18, 20, 25, 27, 30, 33, 46], "dim_feedforward": [16, 18, 19, 27], "num_encoder_lay": [16, 18, 19, 20, 27], "decoder_dim": [16, 18, 19, 20], "joiner_dim": [16, 18, 19, 20], "model_warm_step": [16, 18, 19], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 16, "oct": [16, 30], "miss": [16, 18, 19, 20, 27, 30], "cu102": [16, 18, 19, 20], "1013": 16, "c39cba5": 16, "dirti": [16, 18, 19, 25, 30], "ceph": [16, 25, 27, 30], "jsonl": 16, "0324160024": 16, "65bfd8b584": 16, "jjlbn": 16, "bpe_model": [16, 18, 19, 20, 30], "sound_fil": [16, 25, 27, 30, 41], "sample_r": [16, 25, 27, 30, 41], "16000": [16, 25, 27, 28, 30, 34, 35, 38, 39], "max_context": 16, "max_stat": 16, "context_s": [16, 18, 19, 20, 27], "max_sym_per_fram": [16, 27], "simulate_stream": 16, "decode_chunk_s": 16, "left_context": 16, "dynamic_chunk_train": 16, "causal_convolut": 16, "short_chunk_s": [16, 20, 46, 47], "num_left_chunk": [16, 20], "blank_id": [16, 18, 19, 20, 27], "unk_id": 16, "vocab_s": [16, 18, 19, 20, 27], "271": [16, 19], "612": 16, "458": 16, "giga": [16, 19, 45], "623": 16, "277": 16, "78648040": 16, "951": [16, 30], "285": [16, 27, 30], "construct": [16, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39, 41], "952": 16, "295": [16, 25, 27, 28, 30], "957": 16, "301": [16, 30], "700": 16, "329": [16, 19, 30], "912": 16, "388": 16, "earli": [16, 18, 19, 20, 30, 34], "nightfal": [16, 18, 19, 20, 30, 34], "THE": [16, 18, 19, 20, 30, 34], "yellow": [16, 18, 19, 20, 30, 34], "lamp": [16, 18, 19, 20, 30, 34], "light": [16, 18, 19, 20, 30, 34], "AND": [16, 18, 19, 20, 30, 34], "THERE": [16, 18, 19, 20, 30, 34], "squalid": [16, 18, 19, 20, 30, 34], "quarter": [16, 18, 19, 20, 30, 34], "OF": [16, 18, 19, 20, 30, 34], "brothel": [16, 18, 19, 20, 30, 34], "god": [16, 30, 34], "AS": [16, 30, 34], "direct": [16, 30, 34], "consequ": [16, 30, 34], "sin": [16, 30, 34], "man": [16, 30, 34], "punish": [16, 30, 34], "had": [16, 30, 34], "her": [16, 30, 34], "love": [16, 30, 34], "child": [16, 30, 34], "whose": [16, 27, 30, 34], "ON": [16, 18, 30, 34], "THAT": [16, 30, 34], "dishonor": [16, 30, 34], "bosom": [16, 30, 34], "TO": [16, 30, 34], "parent": [16, 30, 34], "forev": [16, 30, 34], "WITH": [16, 30, 34], "race": [16, 30, 34], "descent": [16, 30, 34], "mortal": [16, 30, 34], "BE": [16, 30, 34], "bless": [16, 30, 34], "soul": [16, 30, 34], "IN": [16, 30, 34], "heaven": [16, 30, 34], "yet": [16, 18, 19, 30, 34], "THESE": [16, 30, 34], "thought": [16, 30, 34], "affect": [16, 30, 34], "hester": [16, 30, 34], "prynn": [16, 30, 34], "hope": [16, 26, 30, 34], "apprehens": [16, 30, 34], "390": 16, "down": [16, 25, 30, 33, 35, 36, 45, 46, 47], "reproduc": [16, 30], "9999": [16, 35, 36, 45], "symlink": 16, "pass": [16, 20, 25, 27, 28, 30, 33, 35, 36, 43, 45, 46, 47], "zipform": [17, 21, 24, 29, 32, 42, 44], "convemform": [17, 24, 43], "platform": [17, 21], "android": [17, 18, 19, 20, 21], "raspberri": [17, 21], "pi": [17, 21], "\u7231\u82af\u6d3e": 17, "maix": 17, "iii": 17, "axera": 17, "rv1126": 17, "static": 17, "produc": [17, 33, 35, 36, 45, 46, 47], "binari": [17, 18, 19, 20, 25, 27, 28, 30, 33, 41, 45, 46], "everyth": 17, "pnnx": [17, 24], "torchscript": [17, 22, 23, 24], "encod": [17, 21, 23, 24, 25, 27, 28, 30, 33, 34, 35, 41, 43, 45, 46, 47], "int8": [17, 24], "quantiz": [17, 24, 31], "conv": [18, 19], "emform": [18, 19, 22], "stateless2": [18, 19, 45], "pretrained_model": [18, 19, 20], "online_transduc": 18, "continu": [18, 19, 20, 21, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "jit_xxx": [18, 19, 20], "anywher": [18, 19], "submodul": 18, "recurs": 18, "init": 18, "dcmake_build_typ": [18, 25, 30], "dncnn_python": 18, "dncnn_build_benchmark": 18, "dncnn_build_exampl": 18, "dncnn_build_tool": 18, "j4": 18, "pwd": 18, "src": [18, 20], "compon": [18, 43], "ncnn2int8": [18, 19], "am": 18, "sai": [18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "later": [18, 19, 20, 25, 28, 30, 33, 34, 35, 36, 38, 39, 45, 46, 47], "termin": 18, "tencent": [18, 19], "modif": [18, 27], "offic": 18, "synchron": 18, "offici": 18, "renam": [18, 19, 20], "conv_emformer_transducer_stateless2": [18, 43], "length": [18, 20, 27, 46, 47], "cnn": [18, 20], "31": [18, 19, 20, 30], "context": [18, 27, 33, 43, 45, 46, 47], "configur": [18, 20, 27, 31, 34, 38, 39, 41], "accordingli": [18, 19, 20], "yourself": [18, 19, 20, 31, 46, 47], "677": 18, "220": [18, 27, 28, 30], "229": [18, 25], "best_v": 18, "alid_epoch": 18, "subsampl": [18, 46, 47], "ing_factor": 18, "a34171ed85605b0926eebbd0463d059431f4f74a": 18, "dec": 18, "ver": 18, "ion": 18, "530e8a1": 18, "op": 18, "1220120619": [18, 19, 20], "7695ff496b": [18, 19, 20], "s9n4w": [18, 19, 20], "icefa": 18, "ll": 18, "transdu": 18, "cer": 18, "use_averaged_model": [18, 19, 20], "cnn_module_kernel": [18, 20], "left_context_length": 18, "chunk_length": 18, "right_context_length": 18, "memory_s": 18, "231": [18, 19, 20], "053": 18, "112": [18, 19, 20], "022": 18, "708": [18, 25, 27, 30, 41], "315": [18, 25, 27, 28, 30, 34], "75490012": 18, "318": [18, 19], "320": [18, 27], "682": 18, "lh": [18, 19, 20], "rw": [18, 19, 20], "289m": 18, "jan": [18, 19, 20], "289": 18, "roughli": [18, 19, 20], "equal": [18, 19, 20, 46, 47], "1024": [18, 19, 20, 45], "287": [18, 41], "1010k": [18, 19], "decoder_jit_trac": [18, 19, 20, 23, 45, 47], "283m": 18, "encoder_jit_trac": [18, 19, 20, 23, 45, 47], "0m": [18, 19], "joiner_jit_trac": [18, 19, 20, 23, 45, 47], "sure": [18, 19, 20], "found": [18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "param": [18, 19, 20], "503k": [18, 19], "437": [18, 19, 20], "142m": 18, "79k": 18, "5m": [18, 19], "architectur": [18, 19, 20, 45], "editor": [18, 19, 20], "content": [18, 19, 20], "283": [18, 20], "1010": [18, 19], "142": [18, 25, 28, 30], "503": [18, 19], "convers": [18, 19, 20], "half": [18, 19, 20, 33, 46, 47], "v": [18, 19, 20, 30, 38, 39], "default": [18, 19, 20, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "float32": [18, 19, 20], "float16": [18, 19, 20], "occupi": [18, 19, 20], "twice": [18, 19, 20], "smaller": [18, 19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "fp16": [18, 19, 20, 33, 35, 36, 45, 46, 47], "won": [18, 19, 20, 21, 25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "accept": [18, 19, 20], "216": [18, 25, 30, 38, 39], "encoder_param_filenam": [18, 19, 20], "encoder_bin_filenam": [18, 19, 20], "decoder_param_filenam": [18, 19, 20], "decoder_bin_filenam": [18, 19, 20], "joiner_param_filenam": [18, 19, 20], "joiner_bin_filenam": [18, 19, 20], "sound_filenam": [18, 19, 20], "141": 18, "328": 18, "151": 18, "176": [18, 27, 30], "336": 18, "106000": [18, 19, 20, 30, 34], "581": [18, 34], "381": 18, "7767517": [18, 19, 20], "1060": 18, "1342": 18, "in0": [18, 19, 20], "explan": [18, 19, 20], "magic": [18, 19, 20], "intermedi": [18, 19, 20], "mean": [18, 19, 20, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 43, 45, 46, 47], "increment": [18, 19, 20], "1061": 18, "sherpametadata": [18, 19, 20], "sherpa_meta_data1": [18, 19, 20], "newli": [18, 19, 20], "must": [18, 19, 20, 46], "kei": [18, 19, 20, 30], "eas": [18, 19, 20], "pair": [18, 19, 20], "sad": [18, 19, 20], "rememb": [18, 19, 20], "anymor": [18, 19, 20], "flexibl": [18, 19, 20], "edit": [18, 19, 20], "arm": [18, 19, 20], "aarch64": [18, 19, 20], "onc": [18, 19], "mayb": [18, 19], "year": [18, 19], "_jit_trac": [18, 19], "fp32": [18, 19], "doubl": [18, 19], "j": [18, 19, 25, 30], "py38": [18, 19, 20], "arg": [18, 19], "wave_filenam": [18, 19], "16k": [18, 19], "hz": [18, 19, 38, 39], "mono": [18, 19], "calibr": [18, 19], "cat": [18, 19], "eof": [18, 19], "calcul": [18, 19, 35, 46, 47], "has_gpu": [18, 19], "config": [18, 19], "use_vulkan_comput": [18, 19], "88": [18, 27], "conv_87": 18, "942385": [18, 19], "threshold": [18, 19, 35], "938493": 18, "968131": 18, "conv_88": 18, "442448": 18, "549335": 18, "167552": 18, "conv_89": 18, "228289": 18, "001738": 18, "871552": 18, "linear_90": 18, "976146": 18, "101789": 18, "115": [18, 19, 25, 30], "267128": 18, "linear_91": 18, "962030": 18, "162033": 18, "602713": 18, "linear_92": 18, "323041": 18, "853959": 18, "953129": 18, "linear_94": 18, "905416": 18, "648006": 18, "323545": 18, "linear_93": 18, "474093": 18, "200188": 18, "linear_95": 18, "888012": 18, "403563": 18, "483986": 18, "linear_96": 18, "856741": 18, "398679": 18, "524273": 18, "linear_97": 18, "635942": 18, "613655": 18, "590950": 18, "linear_98": 18, "460340": 18, "670146": 18, "398010": 18, "linear_99": 18, "532276": 18, "585537": 18, "119396": 18, "linear_101": 18, "585871": 18, "719224": 18, "205809": 18, "linear_100": 18, "751382": 18, "081648": 18, "linear_102": 18, "593344": 18, "450581": 18, "551147": 18, "linear_103": 18, "592681": 18, "705824": 18, "257959": 18, "linear_104": 18, "752957": 18, "980955": 18, "110489": 18, "linear_105": 18, "696240": 18, "877193": 18, "608953": 18, "linear_106": 18, "059659": 18, "643138": 18, "048950": 18, "linear_108": 18, "975461": 18, "589567": 18, "671457": 18, "linear_107": 18, "190381": 18, "515701": 18, "linear_109": 18, "710759": 18, "305635": 18, "082436": 18, "linear_110": 18, "531228": 18, "731162": 18, "159557": 18, "linear_111": 18, "528083": 18, "259322": 18, "211544": 18, "linear_112": 18, "148807": 18, "500842": 18, "087374": 18, "linear_113": 18, "592566": 18, "948851": 18, "166611": 18, "linear_115": 18, "437109": 18, "608947": 18, "642395": 18, "linear_114": 18, "193942": 18, "503904": 18, "linear_116": 18, "966980": 18, "200896": 18, "676392": 18, "linear_117": 18, "451303": 18, "061664": 18, "951344": 18, "linear_118": 18, "077262": 18, "965800": 18, "023804": 18, "linear_119": 18, "671615": 18, "847613": 18, "198460": 18, "linear_120": 18, "625638": 18, "131427": 18, "556595": 18, "linear_122": 18, "274080": 18, "888716": 18, "978189": 18, "linear_121": 18, "420480": 18, "429659": 18, "linear_123": 18, "826197": 18, "599617": 18, "281532": 18, "linear_124": 18, "396383": 18, "325849": 18, "335875": 18, "linear_125": 18, "337198": 18, "941410": 18, "221970": 18, "linear_126": 18, "699965": 18, "842878": 18, "224073": 18, "linear_127": 18, "775370": 18, "884215": 18, "696438": 18, "linear_129": 18, "872276": 18, "837319": 18, "254213": 18, "linear_128": 18, "180057": 18, "687883": 18, "linear_130": 18, "150427": 18, "454298": 18, "765789": 18, "linear_131": 18, "112692": 18, "924847": 18, "025545": 18, "linear_132": 18, "852893": 18, "116593": 18, "749626": 18, "linear_133": 18, "517084": 18, "024665": 18, "275314": 18, "linear_134": 18, "683807": 18, "878618": 18, "743618": 18, "linear_136": 18, "421055": 18, "322729": 18, "086264": 18, "linear_135": 18, "309880": 18, "917679": 18, "linear_137": 18, "827781": 18, "744595": 18, "915554": 18, "linear_138": 18, "422395": 18, "742882": 18, "402161": 18, "linear_139": 18, "527538": 18, "866123": 18, "849449": 18, "linear_140": 18, "128619": 18, "657793": 18, "266134": 18, "linear_141": 18, "839593": 18, "845993": 18, "021378": 18, "linear_143": 18, "442304": 18, "099039": 18, "889746": 18, "linear_142": 18, "325038": 18, "849592": 18, "linear_144": 18, "929444": 18, "618206": 18, "605080": 18, "linear_145": 18, "382126": 18, "321095": 18, "625010": 18, "linear_146": 18, "894987": 18, "867645": 18, "836517": 18, "linear_147": 18, "915313": 18, "906028": 18, "886522": 18, "linear_148": 18, "614287": 18, "908151": 18, "496181": 18, "linear_150": 18, "724932": 18, "485588": 18, "312899": 18, "linear_149": 18, "161146": 18, "606939": 18, "linear_151": 18, "164453": 18, "847355": 18, "719223": 18, "linear_152": 18, "086471": 18, "984121": 18, "222834": 18, "linear_153": 18, "099524": 18, "991601": 18, "816805": 18, "linear_154": 18, "054585": 18, "489706": 18, "286930": 18, "linear_155": 18, "389185": 18, "100321": 18, "963501": 18, "linear_157": 18, "982999": 18, "154796": 18, "637253": 18, "linear_156": 18, "537706": 18, "875190": 18, "linear_158": 18, "420287": 18, "502287": 18, "531588": 18, "linear_159": 18, "014746": 18, "423280": 18, "477261": 18, "linear_160": 18, "633553": 18, "715335": 18, "220921": 18, "linear_161": 18, "371849": 18, "117830": 18, "815203": 18, "linear_162": 18, "492933": 18, "126283": 18, "623318": 18, "linear_164": 18, "697504": 18, "825712": 18, "317358": 18, "linear_163": 18, "078367": 18, "008038": 18, "linear_165": 18, "023975": 18, "836278": 18, "577358": 18, "linear_166": 18, "860619": 18, "259792": 18, "493614": 18, "linear_167": 18, "380934": 18, "496160": 18, "107042": 18, "linear_168": 18, "691216": 18, "733317": 18, "831076": 18, "linear_169": 18, "723948": 18, "952728": 18, "129707": 18, "linear_171": 18, "034811": 18, "366547": 18, "665123": 18, "linear_170": 18, "356277": 18, "710501": 18, "linear_172": 18, "556884": 18, "729481": 18, "166058": 18, "linear_173": 18, "033039": 18, "207264": 18, "442120": 18, "linear_174": 18, "597379": 18, "658676": 18, "768131": 18, "linear_2": [18, 19], "293503": 18, "305265": 18, "877850": 18, "linear_1": [18, 19], "812222": 18, "766452": 18, "487047": 18, "linear_3": [18, 19], "999999": 18, "999755": 18, "031174": 18, "wish": [18, 19], "955k": 18, "18k": 18, "inparam": [18, 19], "inbin": [18, 19], "outparam": [18, 19], "outbin": [18, 19], "99m": 18, "78k": 18, "774k": [18, 19], "496": [18, 19, 30, 34], "replac": [18, 19], "774": [18, 19], "linear": [18, 19, 27], "convolut": [18, 19, 35, 43, 46], "exact": [18, 19], "4x": [18, 19], "comparison": 18, "468000": [19, 23, 45], "lstm_transducer_stateless2": [19, 23, 45], "862": 19, "222": [19, 28, 30], "865": 19, "is_pnnx": 19, "62e404dd3f3a811d73e424199b3408e309c06e1a": [19, 20], "mon": [19, 20], "6d7a559": [19, 20], "feb": [19, 20, 27], "147": [19, 20], "rnn_hidden_s": 19, "aux_layer_period": 19, "235": 19, "239": [19, 27], "472": 19, "595": 19, "324": 19, "83137520": 19, "596": 19, "325": 19, "257024": 19, "781812": 19, "327": 19, "84176356": 19, "182": [19, 20, 25, 34], "158": 19, "183": [19, 38, 39], "335": 19, "101": 19, "tracerwarn": [19, 20], "boolean": [19, 20], "caus": [19, 20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "incorrect": [19, 20, 27], "flow": [19, 20], "constant": [19, 20], "futur": [19, 20, 27, 48], "need_pad": 19, "bool": 19, "259": [19, 25], "180": [19, 25, 30], "339": 19, "304": 19, "207": [19, 28, 30], "84": [19, 25], "324m": 19, "321": [19, 25], "107": [19, 34], "318m": 19, "159m": 19, "21k": 19, "159": [19, 30, 41], "37": [19, 25, 27, 30, 38], "861": 19, "266": [19, 20, 30, 34], "431": 19, "342": 19, "343": 19, "267": [19, 27, 38, 39], "379": 19, "268": [19, 30, 34], "317m": 19, "317": 19, "conv_15": 19, "930708": 19, "972025": 19, "conv_16": 19, "978855": 19, "031788": 19, "456645": 19, "conv_17": 19, "868437": 19, "830528": 19, "218575": 19, "linear_18": 19, "107259": 19, "194808": 19, "293236": 19, "linear_19": 19, "193777": 19, "634748": 19, "401705": 19, "linear_20": 19, "259933": 19, "606617": 19, "722160": 19, "linear_21": 19, "186600": 19, "790260": 19, "512129": 19, "linear_22": 19, "759041": 19, "265832": 19, "050053": 19, "linear_23": 19, "931209": 19, "099090": 19, "979767": 19, "linear_24": 19, "324160": 19, "215561": 19, "321835": 19, "linear_25": 19, "800708": 19, "599352": 19, "284134": 19, "linear_26": 19, "492444": 19, "153369": 19, "274391": 19, "linear_27": 19, "660161": 19, "720994": 19, "674126": 19, "linear_28": 19, "415265": 19, "174434": 19, "007133": 19, "linear_29": 19, "038418": 19, "118534": 19, "724262": 19, "linear_30": 19, "072084": 19, "936867": 19, "259155": 19, "linear_31": 19, "342712": 19, "599489": 19, "282787": 19, "linear_32": 19, "340535": 19, "120308": 19, "701103": 19, "linear_33": 19, "846987": 19, "630030": 19, "985939": 19, "linear_34": 19, "686298": 19, "204571": 19, "607586": 19, "linear_35": 19, "904821": 19, "575518": 19, "756420": 19, "linear_36": 19, "806659": 19, "585589": 19, "118401": 19, "linear_37": 19, "402340": 19, "047157": 19, "162680": 19, "linear_38": 19, "174589": 19, "923361": 19, "030258": 19, "linear_39": 19, "178576": 19, "556058": 19, "807705": 19, "linear_40": 19, "901954": 19, "301267": 19, "956539": 19, "linear_41": 19, "839805": 19, "597429": 19, "716181": 19, "linear_42": 19, "178945": 19, "651595": 19, "895699": 19, "829245": 19, "627592": 19, "637907": 19, "746186": 19, "255032": 19, "167313": 19, "000000": 19, "999756": 19, "031013": 19, "345k": 19, "17k": 19, "218m": 19, "counterpart": 19, "bit": [19, 25, 27, 28, 30, 34, 41], "4532": 19, "feedforward": [20, 27, 33, 46], "384": [20, 30], "192": [20, 30], "unmask": 20, "downsampl": [20, 26], "factor": [20, 25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "473": [20, 30], "246": [20, 27, 30, 38, 39], "477": 20, "warm_step": 20, "2000": [20, 28], "feedforward_dim": 20, "attention_dim": [20, 25, 27, 30], "encoder_unmasked_dim": 20, "zipformer_downsampling_factor": 20, "decode_chunk_len": 20, "257": [20, 27, 38, 39], "023": 20, "zipformer2": 20, "419": 20, "At": [20, 25, 30], "stack": 20, "downsampling_factor": 20, "037": 20, "655": 20, "346": 20, "68944004": 20, "347": 20, "260096": 20, "348": [20, 38], "716276": 20, "656": [20, 30], "349": 20, "69920376": 20, "351": 20, "353": 20, "174": [20, 30], "175": 20, "1344": 20, "assert": 20, "cached_len": 20, "num_lay": 20, "1348": 20, "cached_avg": 20, "1352": 20, "cached_kei": 20, "1356": 20, "cached_v": 20, "1360": 20, "cached_val2": 20, "1364": 20, "cached_conv1": 20, "1368": 20, "cached_conv2": 20, "1373": 20, "left_context_len": 20, "1884": 20, "x_size": 20, "2442": 20, "2449": 20, "2469": 20, "2473": 20, "2483": 20, "kv_len": 20, "k": [20, 33, 38, 39, 45, 46, 47], "2570": 20, "attn_output": 20, "bsz": 20, "num_head": 20, "seq_len": 20, "head_dim": 20, "2926": 20, "lorder": 20, "2652": 20, "2653": 20, "embed_dim": 20, "2666": 20, "1543": 20, "in_x_siz": 20, "1637": 20, "1643": 20, "in_channel": 20, "1571": 20, "1763": 20, "src1": 20, "src2": 20, "1779": 20, "dim1": 20, "1780": 20, "dim2": 20, "_trace": 20, "958": 20, "tracer": 20, "instead": [20, 27, 46], "tupl": 20, "namedtupl": 20, "absolut": 20, "know": [20, 31], "side": 20, "allow": [20, 33, 46], "behavior": [20, 27], "_c": 20, "_create_method_from_trac": 20, "646": 20, "357": 20, "102": [20, 25], "embedding_out": 20, "686": 20, "361": [20, 30, 34], "735": 20, "69": 20, "269m": 20, "269": [20, 25, 38, 39], "725": [20, 34], "1022k": 20, "266m": 20, "8m": 20, "509k": 20, "133m": 20, "152k": 20, "4m": 20, "1022": 20, "133": 20, "509": 20, "260": [20, 30], "360": 20, "365": 20, "280": [20, 30], "372": [20, 25], "state": [20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "026": 20, "410": 20, "411": [20, 30], "2028": 20, "2547": 20, "2029": 20, "23316": 20, "23317": 20, "23318": 20, "23319": 20, "23320": 20, "amount": [20, 26], "pad": [20, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "conv2dsubsampl": 20, "arrai": 20, "23300": 20, "element": 20, "onnx_pretrain": 21, "onnxruntim": 21, "separ": 21, "deploi": [21, 25, 30], "repo_url": 21, "basenam": 21, "cpu_jit": [22, 25, 30, 33, 35, 36, 46, 47], "confus": 22, "move": [22, 33, 35, 36, 46, 47], "why": 22, "streaming_asr": [22, 23, 45, 46, 47], "conv_emform": 22, "offline_asr": [22, 33], "jit_pretrain": [23, 35, 36, 45], "baz": 23, "1best": [25, 28, 30, 34, 35, 36, 38, 39], "automag": [25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "stop": [25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "By": [25, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "musan": [25, 28, 30, 31, 33, 35, 36, 45, 46, 47], "thei": [25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "intal": [25, 28], "apt": [25, 28], "permiss": [25, 28], "commandlin": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "quit": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "experi": [25, 27, 28, 30, 31, 33, 35, 36, 41, 45, 46, 47], "world": [25, 27, 28, 30, 31, 33, 34, 35, 36, 45, 46, 47], "multi": [25, 27, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "machin": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "ddp": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "implement": [25, 27, 28, 30, 31, 33, 35, 36, 43, 45, 46, 47], "utter": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "oom": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "due": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "decai": [25, 28, 30, 35, 36, 45], "warmup": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "function": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "get_param": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "directli": [25, 27, 28, 30, 31, 33, 35, 36, 45, 46, 47], "perturb": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "actual": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "3x150": [25, 27, 28], "450": [25, 27, 28], "visual": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "logdir": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "labelsmooth": 25, "tensorflow": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "press": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "ctrl": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46, 47], "engw8ksktzqs24zbv5dgcg": 25, "2021": [25, 28, 30, 34, 38, 39, 41], "22t11": 25, "scan": [25, 27, 28, 30, 33, 41, 45, 46], "116068": 25, "scalar": [25, 27, 28, 30, 33, 41, 45, 46], "listen": [25, 27, 28, 33, 41, 45, 46], "url": [25, 27, 28, 30, 33, 35, 36, 41, 45, 46], "xxxx": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "saw": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "consol": [25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "avoid": [25, 27, 30], "nbest": [25, 30, 36], "lattic": [25, 28, 30, 33, 34, 38, 39, 46, 47], "uniqu": [25, 30, 33, 46, 47], "pkufool": [25, 28, 34], "icefall_asr_aishell_conformer_ctc": 25, "transcrib": [25, 27, 28, 30], "v1": [25, 28, 30, 34, 38, 39], "lang_char": [25, 27], "bac009s0764w0121": [25, 27, 28], "bac009s0764w0122": [25, 27, 28], "bac009s0764w0123": [25, 27, 28], "tran": [25, 28, 30, 34, 38, 39], "graph": [25, 28, 30, 33, 34, 38, 39, 46, 47], "id": [25, 28, 30, 34, 38, 39], "conveni": [25, 28, 30, 31], "eo": [25, 28, 30], "soxi": [25, 27, 28, 30, 34, 41], "sampl": [25, 27, 28, 30, 34, 35, 41, 46, 47], "precis": [25, 27, 28, 30, 33, 34, 41, 46, 47], "67263": [25, 27, 28], "cdda": [25, 27, 28, 30, 34, 41], "sector": [25, 27, 28, 30, 34, 41], "135k": [25, 27, 28], "256k": [25, 27, 28, 30], "sign": [25, 27, 28, 30, 41], "integ": [25, 27, 28, 30, 41], "pcm": [25, 27, 28, 30, 41], "65840": [25, 27, 28], "308": [25, 27, 28], "625": [25, 27, 28], "132k": [25, 27, 28], "64000": [25, 27, 28], "300": [25, 27, 28, 30, 31, 33, 46], "128k": [25, 27, 28, 41], "displai": [25, 27, 28, 30], "topologi": [25, 30], "num_decoder_lay": [25, 30], "vgg_frontend": [25, 27, 30], "use_feat_batchnorm": [25, 30], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 25, "sun": 25, "sep": 25, "33cfe45": 25, "d57a873": 25, "nov": [25, 30], "hw": 25, "kangwei": 25, "icefall_aishell3": 25, "k2_releas": 25, "tokens_fil": 25, "words_fil": [25, 30, 41], "num_path": [25, 30, 33, 46, 47], "ngram_lm_scal": [25, 30], "attention_decoder_scal": [25, 30], "nbest_scal": [25, 30], "sos_id": [25, 30], "eos_id": [25, 30], "num_class": [25, 30, 41], "4336": [25, 27], "131": [25, 30], "134": 25, "138": [25, 27], "293": [25, 30], "369": [25, 30], "\u751a": [25, 27], "\u81f3": [25, 27], "\u51fa": [25, 27], "\u73b0": [25, 27], "\u4ea4": [25, 27], "\u6613": [25, 27], "\u51e0": [25, 27], "\u4e4e": [25, 27], "\u505c": [25, 27], "\u6b62": 25, "\u7684": [25, 27, 28], "\u60c5": [25, 27], "\u51b5": [25, 27], "\u4e00": [25, 27], "\u4e8c": [25, 27], "\u7ebf": [25, 27, 28], "\u57ce": [25, 27], "\u5e02": [25, 27], "\u867d": [25, 27], "\u7136": [25, 27], "\u4e5f": [25, 27, 28], "\u5904": [25, 27], "\u4e8e": [25, 27], "\u8c03": [25, 27], "\u6574": [25, 27], "\u4e2d": [25, 27, 28], "\u4f46": [25, 27, 28], "\u56e0": [25, 27], "\u4e3a": [25, 27], "\u805a": [25, 27], "\u96c6": [25, 27], "\u4e86": [25, 27, 28], "\u8fc7": [25, 27], "\u591a": [25, 27], "\u516c": [25, 27], "\u5171": [25, 27], "\u8d44": [25, 27], "\u6e90": [25, 27], "371": 25, "683": 25, "684": [25, 41], "651": [25, 41], "654": 25, "659": 25, "752": 25, "887": 25, "340": 25, "370": 25, "\u751a\u81f3": [25, 28], "\u51fa\u73b0": [25, 28], "\u4ea4\u6613": [25, 28], "\u51e0\u4e4e": [25, 28], "\u505c\u6b62": 25, "\u60c5\u51b5": [25, 28], "\u4e00\u4e8c": [25, 28], "\u57ce\u5e02": [25, 28], "\u867d\u7136": [25, 28], "\u5904\u4e8e": [25, 28], "\u8c03\u6574": [25, 28], "\u56e0\u4e3a": [25, 28], "\u805a\u96c6": [25, 28], "\u8fc7\u591a": [25, 28], "\u516c\u5171": [25, 28], "\u8d44\u6e90": [25, 28], "recor": [25, 30], "highest": [25, 30], "966": 25, "821": 25, "822": 25, "826": 25, "916": 25, "345": 25, "888": 25, "889": 25, "limit": [25, 27, 30, 43, 46], "upgrad": [25, 30], "pro": [25, 30], "NOT": [25, 27, 30, 41], "checkout": [25, 30], "hlg_decod": [25, 30], "four": [25, 30], "messag": [25, 30, 33, 35, 36, 45, 46, 47], "nn_model": [25, 30], "use_gpu": [25, 30], "word_tabl": [25, 30], "caution": [25, 30], "forward": [25, 30, 35], "cu": [25, 30], "int": [25, 30], "char": [25, 30], "98": 25, "150": [25, 30], "693": [25, 38], "165": [25, 30], "nnet_output": [25, 30], "185": [25, 30, 41], "217": [25, 30], "mandarin": 26, "beij": 26, "shell": 26, "technologi": 26, "ltd": 26, "peopl": 26, "accent": 26, "area": 26, "china": 26, "invit": 26, "particip": 26, "conduct": 26, "indoor": 26, "high": 26, "fidel": 26, "microphon": 26, "16khz": 26, "manual": 26, "through": 26, "profession": 26, "annot": 26, "inspect": 26, "free": [26, 31, 45], "academ": 26, "moder": 26, "research": 26, "field": 26, "openslr": 26, "ctc": [26, 29, 32, 36, 37, 40], "stateless": [26, 29, 33, 45, 46, 47], "conv1d": [27, 33, 45, 46, 47], "nn": [27, 33, 35, 36, 45, 46, 47], "tanh": 27, "borrow": 27, "ieeexplor": 27, "ieee": 27, "stamp": 27, "jsp": 27, "arnumb": 27, "9054419": 27, "predict": [27, 31, 33, 45, 46, 47], "charact": 27, "unit": 27, "vocabulari": 27, "87939824": 27, "optimized_transduc": 27, "technqiu": 27, "end": [27, 33, 35, 36, 41, 45, 46, 47], "furthermor": 27, "maximum": 27, "emit": 27, "per": [27, 33, 46, 47], "simplifi": [27, 43], "significantli": 27, "degrad": 27, "exactli": 27, "unprun": 27, "advantag": 27, "minim": 27, "pruned_transducer_stateless": [27, 33, 43, 46], "altern": 27, "though": 27, "transducer_stateless_modifi": 27, "pr": 27, "ram": 27, "tri": 27, "prob": [27, 45], "219": [27, 30], "lagz6hrcqxoigbfd5e0y3q": 27, "03t14": 27, "8477": 27, "250": [27, 34], "sym": [27, 33, 46, 47], "beam_search": [27, 33, 46, 47], "decoding_method": 27, "beam_4": 27, "ensur": 27, "poor": 27, "531": [27, 28], "994": [27, 30], "027": 27, "encoder_out_dim": 27, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 27, "50d2281": 27, "mar": 27, "0815224919": 27, "75d558775b": 27, "mmnv8": 27, "72": [27, 30], "248": 27, "878": [27, 39], "880": 27, "891": 27, "113": [27, 30], "userwarn": 27, "__floordiv__": 27, "round": 27, "toward": 27, "trunc": 27, "floor": 27, "keep": [27, 33, 46, 47], "div": 27, "b": [27, 30, 38, 39], "rounding_mod": 27, "divis": 27, "x_len": 27, "163": [27, 30], "\u6ede": 27, "322": 27, "759": 27, "760": 27, "919": 27, "922": 27, "929": 27, "046": 27, "047": 27, "319": [27, 30], "798": 27, "831": [27, 39], "214": [27, 30], "215": [27, 30, 34], "402": 27, "topk_hyp_index": 27, "topk_index": 27, "logit": 27, "583": [27, 39], "lji9mwuorlow3jkdhxwk8a": 28, "13t11": 28, "4454": 28, "icefall_asr_aishell_tdnn_lstm_ctc": 28, "858": [28, 30], "389": [28, 30], "154": 28, "161": [28, 30], "536": 28, "539": 28, "917": 28, "129": 28, "\u505c\u6ede": 28, "mmi": [29, 32], "blank": [29, 32], "skip": [29, 31, 32, 33, 45, 46, 47], "distil": [29, 32], "hubert": [29, 32], "ligru": [29, 37], "full": [30, 31, 33, 35, 36, 45, 46, 47], "libri": [30, 31, 33, 35, 36, 45, 46, 47], "subset": [30, 33, 35, 36, 45, 46, 47], "3x960": [30, 33, 35, 36, 45, 46, 47], "2880": [30, 33, 35, 36, 45, 46, 47], "lzgnetjwrxc3yghnmd4kpw": 30, "24t16": 30, "4540": 30, "sentenc": 30, "piec": 30, "And": [30, 33, 35, 36, 45, 46, 47], "neither": 30, "nor": 30, "5000": 30, "033": 30, "537": 30, "538": 30, "full_libri": [30, 31], "464": 30, "548": 30, "776": 30, "652": [30, 41], "109226120": 30, "714": [30, 38], "206": 30, "944": 30, "1328": 30, "443": [30, 34], "2563": 30, "494": 30, "592": 30, "1715": 30, "52576": 30, "128": 30, "1424": 30, "807": 30, "506": 30, "808": [30, 38], "362": 30, "1477": 30, "2922": 30, "208": 30, "4295": 30, "52343": 30, "396": 30, "3584": 30, "432": 30, "680": [30, 38], "_pickl": 30, "unpicklingerror": 30, "invalid": 30, "hlg_modifi": 30, "g_4_gram": [30, 34, 38, 39], "sentencepiec": 30, "875": [30, 34], "212k": 30, "267440": [30, 34], "1253": [30, 34], "535k": 30, "83": [30, 34], "77200": [30, 34], "154k": 30, "554": 30, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 30, "8d93169": 30, "601": 30, "025": 30, "broffel": 30, "osom": 30, "723": 30, "775": 30, "881": 30, "234": 30, "571": 30, "whole": [30, 34, 38, 39, 46, 47], "857": 30, "979": 30, "055": 30, "117": 30, "051": 30, "363": 30, "959": [30, 39], "546": 30, "598": 30, "599": [30, 34], "833": 30, "834": 30, "915": 30, "076": 30, "110": 30, "397": 30, "999": [30, 33, 46, 47], "concaten": 30, "bucket": 30, "sampler": 30, "1000": 30, "ctc_decod": 30, "ngram_lm_rescor": 30, "attention_rescor": 30, "kind": [30, 33, 35, 36, 45, 46, 47], "105": 30, "125": [30, 41], "136": 30, "228": 30, "144": 30, "543": 30, "topo": 30, "547": 30, "729": 30, "703": 30, "545": 30, "279": 30, "122": 30, "126": 30, "135": [30, 41], "153": [30, 41], "945": 30, "475": 30, "191": [30, 38, 39], "398": 30, "515": 30, "deseri": 30, "441": 30, "fsaclass": 30, "loadfsa": 30, "const": 30, "string": 30, "c10": 30, "ignor": 30, "dummi": 30, "589": 30, "attention_scal": 30, "162": 30, "169": [30, 38, 39], "188": 30, "984": 30, "624": 30, "519": [30, 39], "632": 30, "645": [30, 41], "243": 30, "303": 30, "179": 30, "knowledg": 31, "vector": 31, "mvq": 31, "kd": 31, "pruned_transducer_stateless4": [31, 33, 43, 46], "theoret": 31, "applic": 31, "minor": 31, "out": 31, "thing": 31, "distillation_with_hubert": 31, "Of": 31, "cours": 31, "xl": 31, "proce": 31, "960h": [31, 35], "use_extracted_codebook": 31, "augment": 31, "th": [31, 38, 39], "fine": 31, "embedding_lay": 31, "num_codebook": 31, "under": 31, "vq_fbank_layer36_cb8": 31, "whola": 31, "snippet": 31, "echo": 31, "awk": 31, "split": 31, "_": 31, "pruned_transducer_stateless6": 31, "12359": 31, "spec": 31, "aug": 31, "warp": 31, "paid": 31, "suitabl": [33, 45, 46, 47], "pruned_transducer_stateless2": [33, 43, 46], "pruned_transducer_stateless5": [33, 43, 46], "scroll": [33, 35, 36, 45, 46, 47], "arxiv": [33, 45, 46, 47], "2206": [33, 45, 46, 47], "13236": [33, 45, 46, 47], "rework": [33, 43, 46], "daniel": [33, 46, 47], "joint": [33, 45, 46, 47], "contrari": [33, 45, 46, 47], "convent": [33, 45, 46, 47], "recurr": [33, 45, 46, 47], "2x": [33, 46, 47], "littl": [33, 46], "436000": [33, 35, 36, 45, 46, 47], "438000": [33, 35, 36, 45, 46, 47], "qogspbgsr8kzcrmmie9jgw": 33, "20t15": [33, 45, 46], "4468": [33, 45, 46], "210171": [33, 45, 46], "access": [33, 35, 36, 45, 46, 47], "googl": [33, 35, 36, 45, 46, 47], "6008": [33, 35, 36, 45, 46, 47], "localhost": [33, 35, 36, 45, 46, 47], "expos": [33, 35, 36, 45, 46, 47], "proxi": [33, 35, 36, 45, 46, 47], "bind_al": [33, 35, 36, 45, 46, 47], "fast_beam_search": [33, 35, 45, 46, 47], "474000": [33, 45, 46, 47], "largest": [33, 46, 47], "posterior": [33, 35, 46, 47], "algorithm": [33, 46, 47], "pdf": [33, 36, 46, 47], "1211": [33, 46, 47], "3711": [33, 46, 47], "espnet": [33, 46, 47], "net": [33, 46, 47], "beam_search_transduc": [33, 46, 47], "basicli": [33, 46, 47], "topk": [33, 46, 47], "expand": [33, 46, 47], "mode": [33, 46, 47], "being": [33, 46, 47], "hardcod": [33, 46, 47], "composit": [33, 46, 47], "log_prob": [33, 46, 47], "hard": [33, 43, 46, 47], "2211": [33, 46, 47], "00484": [33, 46, 47], "fast_beam_search_lg": [33, 46, 47], "trivial": [33, 46, 47], "fast_beam_search_nbest": [33, 46, 47], "random_path": [33, 46, 47], "shortest": [33, 46, 47], "fast_beam_search_nbest_lg": [33, 46, 47], "logic": [33, 46, 47], "smallest": [33, 45, 46, 47], "normal": [34, 38, 39, 41, 46], "icefall_asr_librispeech_tdnn": 34, "lstm_ctc": 34, "flac": 34, "116k": 34, "140k": 34, "343k": 34, "164k": 34, "105k": 34, "174k": 34, "pretraind": 34, "170": 34, "584": [34, 39], "791": 34, "245": 34, "098": 34, "099": 34, "methond": [34, 38, 39], "631": 34, "190": 34, "121": 34, "010": 34, "guidanc": 35, "bigger": 35, "simpli": 35, "discard": 35, "prevent": 35, "lconv": 35, "encourag": [35, 36, 45], "stabil": [35, 36], "doesn": 35, "warm": [35, 36], "xyozukpeqm62hbilud4upa": [35, 36], "ctc_guide_decode_b": 35, "pretrained_ctc": 35, "jit_pretrained_ctc": 35, "100h": 35, "yfyeung": 35, "wechat": 36, "zipformer_mmi": 36, "worker": [36, 45], "hp": 36, "tdnn_ligru_ctc": 38, "enough": [38, 39, 41], "luomingshuang": [38, 39], "icefall_asr_timit_tdnn_ligru_ctc": 38, "pretrained_average_9_25": 38, "fdhc0_si1559": [38, 39], "felc0_si756": [38, 39], "fmgd0_si1564": [38, 39], "ffprobe": [38, 39], "show_format": [38, 39], "nistspher": [38, 39], "database_id": [38, 39], "database_vers": [38, 39], "utterance_id": [38, 39], "dhc0_si1559": [38, 39], "sample_min": [38, 39], "4176": [38, 39], "sample_max": [38, 39], "5984": [38, 39], "bitrat": [38, 39], "pcm_s16le": [38, 39], "s16": [38, 39], "elc0_si756": [38, 39], "1546": [38, 39], "1989": [38, 39], "mgd0_si1564": [38, 39], "7626": [38, 39], "10573": [38, 39], "660": 38, "695": 38, "697": 38, "819": 38, "829": 38, "sil": [38, 39], "dh": [38, 39], "ih": [38, 39], "uw": [38, 39], "ah": [38, 39], "ii": [38, 39], "z": [38, 39], "aa": [38, 39], "ei": [38, 39], "dx": [38, 39], "d": [38, 39], "uh": [38, 39], "ng": [38, 39], "eh": [38, 39], "jh": [38, 39], "er": [38, 39], "ai": [38, 39], "hh": [38, 39], "aw": 38, "ae": [38, 39], "705": 38, "715": 38, "720": 38, "251": [38, 39], "ch": 38, "icefall_asr_timit_tdnn_lstm_ctc": 39, "pretrained_average_16_25": 39, "816": 39, "827": 39, "unk": 39, "739": 39, "971": 39, "977": 39, "978": 39, "981": 39, "ow": 39, "ykubhb5wrmosxykid1z9eg": 41, "23t23": 41, "icefall_asr_yesno_tdnn": 41, "l_disambig": 41, "lexicon_disambig": 41, "0_0_0_1_0_0_0_1": 41, "0_0_1_0_0_0_1_0": 41, "0_0_1_0_0_1_1_1": 41, "0_0_1_0_1_0_0_1": 41, "0_0_1_1_0_0_0_1": 41, "0_0_1_1_0_1_1_0": 41, "0_0_1_1_1_0_0_0": 41, "0_0_1_1_1_1_0_0": 41, "0_1_0_0_0_1_0_0": 41, "0_1_0_0_1_0_1_0": 41, "0_1_0_1_0_0_0_0": 41, "0_1_0_1_1_1_0_0": 41, "0_1_1_0_0_1_1_1": 41, "0_1_1_1_0_0_1_0": 41, "0_1_1_1_1_0_1_0": 41, "1_0_0_0_0_0_0_0": 41, "1_0_0_0_0_0_1_1": 41, "1_0_0_1_0_1_1_1": 41, "1_0_1_1_0_1_1_1": 41, "1_0_1_1_1_1_0_1": 41, "1_1_0_0_0_1_1_1": 41, "1_1_0_0_1_0_1_1": 41, "1_1_0_1_0_1_0_0": 41, "1_1_0_1_1_0_0_1": 41, "1_1_0_1_1_1_1_0": 41, "1_1_1_0_0_1_0_1": 41, "1_1_1_0_1_0_1_0": 41, "1_1_1_1_0_0_1_0": 41, "1_1_1_1_1_0_0_0": 41, "1_1_1_1_1_1_1_1": 41, "54080": 41, "507": 41, "108k": 41, "ye": 41, "hebrew": 41, "NO": 41, "119": 41, "650": 41, "139": 41, "143": 41, "198": 41, "181": 41, "186": 41, "187": 41, "213": 41, "correctli": 41, "simplest": 41, "former": 43, "idea": 43, "mask": [43, 46, 47], "wenet": 43, "did": 43, "request": 43, "metion": 43, "complic": 43, "techniqu": 43, "bank": 43, "memor": 43, "histori": 43, "introduc": 43, "variant": 43, "pruned_stateless_emformer_rnnt2": 43, "conv_emformer_transducer_stateless": 43, "ourself": 43, "mechan": 43, "onlin": 45, "lstm_transducer_stateless": 45, "lower": 45, "prepare_giga_speech": 45, "cj2vtpiwqhkn9q1tx6ptpg": 45, "dynam": [46, 47], "causal": 46, "short": [46, 47], "2012": 46, "05481": 46, "flag": 46, "indic": [46, 47], "whether": 46, "sequenc": [46, 47], "uniformli": [46, 47], "seen": [46, 47], "97vkxf80ru61cnp2alwzzg": 46, "streaming_decod": [46, 47], "wise": [46, 47], "parallel": [46, 47], "bath": [46, 47], "parallelli": [46, 47], "seem": 46, "benefit": 46, "mdoel": 46, "320m": 47, "550": 47, "scriptmodul": 47, "jit_trace_export": 47, "jit_trace_pretrain": 47, "task": 48}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": [0, 9], "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 16, 22, 23], "creat": [2, 15], "recip": [2, 48], "data": [2, 9, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "prepar": [2, 9, 15, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "train": [2, 9, 12, 15, 18, 19, 20, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "decod": [2, 5, 6, 7, 9, 15, 16, 21, 25, 27, 28, 30, 31, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "pre": [2, 12, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "model": [2, 5, 12, 16, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "lodr": [4, 6], "rnn": 4, "transduc": [4, 6, 7, 18, 19, 20, 27, 33, 45, 46, 47], "wer": [4, 5, 6, 7, 30], "differ": [4, 6, 7], "beam": [4, 6, 7, 27], "size": [4, 6, 7], "languag": 5, "lm": [5, 6, 30], "rescor": [5, 6, 25, 30], "base": [5, 6], "method": [5, 6], "v": [5, 6], "shallow": [5, 6, 7], "fusion": [5, 6, 7], "The": [5, 6, 27], "number": [5, 6], "each": [5, 6], "field": [5, 6], "i": [5, 6], "test": [5, 6, 7, 15, 18, 19, 20], "clean": [5, 6, 7], "other": [5, 6], "time": [5, 6, 7], "docker": [8, 9], "introduct": [9, 43], "view": 9, "avail": 9, "tag": 9, "download": [9, 15, 18, 19, 20, 21, 25, 27, 28, 30, 33, 34, 35, 36, 38, 39, 41, 45, 46, 47], "imag": 9, "run": [9, 16], "gpu": 9, "cpu": 9, "yesno": [9, 40], "within": 9, "contain": 9, "updat": 9, "frequent": 10, "ask": 10, "question": 10, "faq": 10, "oserror": 10, "libtorch_hip": 10, "so": 10, "cannot": 10, "open": 10, "share": 10, "object": 10, "file": [10, 21], "directori": 10, "attributeerror": 10, "modul": 10, "distutil": 10, "ha": 10, "attribut": 10, "version": 10, "importerror": 10, "libpython3": 10, "10": 10, "1": [10, 15, 18, 19, 20, 25, 27, 28, 30], "0": [10, 15], "No": 10, "huggingfac": [11, 13], "space": 13, "youtub": [13, 15], "video": [13, 15], "icefal": [14, 15, 18, 19, 20], "content": [14, 48], "instal": [15, 18, 19, 20, 25, 27, 28, 30, 34, 38, 39], "cuda": 15, "toolkit": 15, "cudnn": 15, "torch": [15, 18, 19, 20, 22, 23, 33, 35, 36, 45, 46, 47], "torchaudio": 15, "2": [15, 18, 19, 20, 25, 27, 28, 30], "k2": 15, "3": [15, 18, 19, 20, 25, 27, 30], "lhots": 15, "4": [15, 18, 19, 20], "exampl": [15, 21, 25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "virtual": 15, "environ": 15, "5": [15, 18, 19, 20], "6": [15, 18, 19, 20], "your": 15, "export": [16, 17, 18, 19, 20, 21, 22, 23, 24, 33, 35, 36, 45, 46, 47], "state_dict": [16, 33, 35, 36, 45, 46, 47], "when": [16, 22, 23], "us": [16, 22, 23, 33, 35, 36, 45, 46, 47], "py": 16, "ncnn": [17, 18, 19, 20], "convemform": 18, "pnnx": [18, 19, 20], "via": [18, 19, 20], "jit": [18, 19, 20, 22, 23, 33, 35, 36, 45, 46, 47], "trace": [18, 19, 20, 23, 45, 47], "torchscript": [18, 19, 20], "modifi": [18, 19, 20, 27], "encod": [18, 19, 20], "sherpa": [18, 19, 20, 21, 33, 46, 47], "7": [18, 19], "option": [18, 19, 25, 28, 30, 33, 35, 36, 45, 46, 47], "int8": [18, 19], "quantiz": [18, 19], "lstm": [19, 28, 34, 39, 45], "stream": [20, 29, 42, 43, 46, 47], "zipform": [20, 35, 36, 47], "onnx": 21, "sound": 21, "script": [22, 33, 35, 36, 46, 47], "conform": [25, 30, 43], "ctc": [25, 28, 30, 34, 35, 38, 39, 41], "configur": [25, 28, 30, 33, 35, 36, 45, 46, 47], "log": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "usag": [25, 27, 28, 30, 33, 35, 36, 45, 46, 47], "case": [25, 27, 28, 30], "kaldifeat": [25, 27, 28, 30, 34, 38, 39, 41], "hlg": [25, 28, 30], "attent": [25, 30], "colab": [25, 27, 28, 30, 34, 38, 39, 41], "notebook": [25, 27, 28, 30, 34, 38, 39, 41], "deploy": [25, 30], "c": [25, 30], "aishel": 26, "stateless": 27, "loss": 27, "todo": 27, "greedi": 27, "search": 27, "tdnn": [28, 34, 38, 39, 41], "non": 29, "asr": [29, 42], "comput": 30, "n": 30, "gram": 30, "distil": 31, "hubert": 31, "codebook": 31, "index": 31, "librispeech": [32, 44], "prune": [33, 46], "statelessx": [33, 46], "pretrain": [33, 35, 36, 45, 46, 47], "deploi": [33, 46, 47], "infer": [34, 38, 39, 41], "blank": 35, "skip": 35, "mmi": 36, "timit": 37, "ligru": 38, "emform": 43, "which": 45, "simul": [46, 47], "real": [46, 47], "tabl": 48}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [27, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [15, "training"], [25, "training"], [27, "training"], [28, "training"], [30, "training"], [31, "training"], [33, "training"], [34, "training"], [35, "training"], [36, "training"], [38, "training"], [39, "training"], [41, "training"], [45, "training"], [46, "training"], [47, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [15, "decoding"], [25, "decoding"], [27, "decoding"], [28, "decoding"], [30, "decoding"], [31, "decoding"], [33, "decoding"], [34, "decoding"], [35, "decoding"], [36, "decoding"], [38, "decoding"], [39, "decoding"], [41, "decoding"], [45, "decoding"], [46, "decoding"], [47, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "LODR for RNN Transducer": [[4, "lodr-for-rnn-transducer"]], "WER of LODR with different beam sizes": [[4, "id1"]], "Decoding with language models": [[5, "decoding-with-language-models"]], "LM-rescoring-based methods vs shallow-fusion-based methods (The numbers in each field is WER on test-clean, WER on test-other and decoding time on test-clean)": [[5, "id1"], [6, "id3"]], "LM rescoring for Transducer": [[6, "lm-rescoring-for-transducer"]], "WERs of LM rescoring with different beam sizes": [[6, "id1"]], "WERs of LM rescoring + LODR with different beam sizes": [[6, "id2"]], "Shallow fusion for Transducer": [[7, "shallow-fusion-for-transducer"]], "WERs and decoding time (on test-clean) of shallow fusion with different beam sizes": [[7, "id2"]], "Docker": [[8, "docker"]], "Introduction": [[9, "introduction"], [43, "introduction"]], "View available tags": [[9, "view-available-tags"]], "Download a docker image": [[9, "download-a-docker-image"]], "Run a docker image with GPU": [[9, "run-a-docker-image-with-gpu"]], "Run a docker image with CPU": [[9, "run-a-docker-image-with-cpu"]], "Run yesno within a docker container": [[9, "run-yesno-within-a-docker-container"]], "Update the code": [[9, "update-the-code"]], "Data preparation": [[9, "data-preparation"], [15, "data-preparation"], [25, "data-preparation"], [28, "data-preparation"], [30, "data-preparation"], [31, "data-preparation"], [33, "data-preparation"], [34, "data-preparation"], [35, "data-preparation"], [36, "data-preparation"], [38, "data-preparation"], [39, "data-preparation"], [41, "data-preparation"], [45, "data-preparation"], [46, "data-preparation"], [47, "data-preparation"]], "Frequently Asked Questions (FAQs)": [[10, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[10, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[10, "attributeerror-module-distutils-has-no-attribute-version"]], "ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory": [[10, "importerror-libpython3-10-so-1-0-cannot-open-shared-object-file-no-such-file-or-directory"]], "Huggingface": [[11, "huggingface"]], "Pre-trained models": [[12, "pre-trained-models"]], "Huggingface spaces": [[13, "huggingface-spaces"]], "YouTube Video": [[13, "youtube-video"], [15, "youtube-video"]], "Icefall": [[14, "icefall"]], "Contents:": [[14, null]], "Installation": [[15, "installation"]], "(0) Install CUDA toolkit and cuDNN": [[15, "install-cuda-toolkit-and-cudnn"]], "(1) Install torch and torchaudio": [[15, "install-torch-and-torchaudio"]], "(2) Install k2": [[15, "install-k2"]], "(3) Install lhotse": [[15, "install-lhotse"]], "(4) Download icefall": [[15, "download-icefall"]], "Installation example": [[15, "installation-example"]], "(1) Create a virtual environment": [[15, "create-a-virtual-environment"]], "(2) Install CUDA toolkit and cuDNN": [[15, "id1"]], "(3) Install torch and torchaudio": [[15, "id2"]], "(4) Install k2": [[15, "id3"]], "(5) Install lhotse": [[15, "id5"]], "(6) Download icefall": [[15, "id6"]], "Test Your Installation": [[15, "test-your-installation"]], "Export model.state_dict()": [[16, "export-model-state-dict"], [33, "export-model-state-dict"], [35, "export-model-state-dict"], [36, "export-model-state-dict"], [45, "export-model-state-dict"], [46, "export-model-state-dict"], [47, "export-model-state-dict"]], "When to use it": [[16, "when-to-use-it"], [22, "when-to-use-it"], [23, "when-to-use-it"]], "How to export": [[16, "how-to-export"], [22, "how-to-export"], [23, "how-to-export"]], "How to use the exported model": [[16, "how-to-use-the-exported-model"], [22, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[16, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[17, "export-to-ncnn"]], "Export ConvEmformer transducer models to ncnn": [[18, "export-convemformer-transducer-models-to-ncnn"]], "1. Download the pre-trained model": [[18, "download-the-pre-trained-model"], [19, "download-the-pre-trained-model"], [20, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[18, "install-ncnn-and-pnnx"], [19, "install-ncnn-and-pnnx"], [20, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[18, "export-the-model-via-torch-jit-trace"], [19, "export-the-model-via-torch-jit-trace"], [20, "export-the-model-via-torch-jit-trace"]], "4. Export torchscript model via pnnx": [[18, "export-torchscript-model-via-pnnx"], [19, "export-torchscript-model-via-pnnx"], [20, "export-torchscript-model-via-pnnx"]], "5. Test the exported models in icefall": [[18, "test-the-exported-models-in-icefall"], [19, "test-the-exported-models-in-icefall"], [20, "test-the-exported-models-in-icefall"]], "6. Modify the exported encoder for sherpa-ncnn": [[18, "modify-the-exported-encoder-for-sherpa-ncnn"], [19, "modify-the-exported-encoder-for-sherpa-ncnn"], [20, "modify-the-exported-encoder-for-sherpa-ncnn"]], "7. (Optional) int8 quantization with sherpa-ncnn": [[18, "optional-int8-quantization-with-sherpa-ncnn"], [19, "optional-int8-quantization-with-sherpa-ncnn"]], "Export LSTM transducer models to ncnn": [[19, "export-lstm-transducer-models-to-ncnn"]], "Export streaming Zipformer transducer models to ncnn": [[20, "export-streaming-zipformer-transducer-models-to-ncnn"]], "Export to ONNX": [[21, "export-to-onnx"]], "sherpa-onnx": [[21, "sherpa-onnx"]], "Example": [[21, "example"]], "Download the pre-trained model": [[21, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [27, "download-the-pre-trained-model"], [28, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"], [34, "download-the-pre-trained-model"], [38, "download-the-pre-trained-model"], [39, "download-the-pre-trained-model"], [41, "download-the-pre-trained-model"]], "Export the model to ONNX": [[21, "export-the-model-to-onnx"]], "Decode sound files with exported ONNX models": [[21, "decode-sound-files-with-exported-onnx-models"]], "Export model with torch.jit.script()": [[22, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[23, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[23, "how-to-use-the-exported-models"]], "Model export": [[24, "model-export"]], "Conformer CTC": [[25, "conformer-ctc"], [30, "conformer-ctc"]], "Configurable options": [[25, "configurable-options"], [28, "configurable-options"], [30, "configurable-options"], [33, "configurable-options"], [35, "configurable-options"], [36, "configurable-options"], [45, "configurable-options"], [46, "configurable-options"], [47, "configurable-options"]], "Pre-configured options": [[25, "pre-configured-options"], [28, "pre-configured-options"], [30, "pre-configured-options"], [33, "pre-configured-options"], [35, "pre-configured-options"], [36, "pre-configured-options"], [45, "pre-configured-options"], [46, "pre-configured-options"], [47, "pre-configured-options"]], "Training logs": [[25, "training-logs"], [27, "training-logs"], [28, "training-logs"], [30, "training-logs"], [33, "training-logs"], [35, "training-logs"], [36, "training-logs"], [45, "training-logs"], [46, "training-logs"], [47, "training-logs"]], "Usage examples": [[25, "usage-examples"], [27, "usage-examples"], [28, "usage-examples"], [30, "usage-examples"]], "Case 1": [[25, "case-1"], [27, "case-1"], [28, "case-1"], [30, "case-1"]], "Case 2": [[25, "case-2"], [27, "case-2"], [28, "case-2"], [30, "case-2"]], "Case 3": [[25, "case-3"], [27, "case-3"], [30, "case-3"]], "Pre-trained Model": [[25, "pre-trained-model"], [27, "pre-trained-model"], [28, "pre-trained-model"], [30, "pre-trained-model"], [34, "pre-trained-model"], [38, "pre-trained-model"], [39, "pre-trained-model"], [41, "pre-trained-model"]], "Install kaldifeat": [[25, "install-kaldifeat"], [27, "install-kaldifeat"], [28, "install-kaldifeat"], [30, "install-kaldifeat"], [34, "install-kaldifeat"], [38, "install-kaldifeat"], [39, "install-kaldifeat"]], "Usage": [[25, "usage"], [27, "usage"], [28, "usage"], [30, "usage"]], "CTC decoding": [[25, "ctc-decoding"], [30, "ctc-decoding"], [30, "id2"]], "HLG decoding": [[25, "hlg-decoding"], [25, "id2"], [28, "hlg-decoding"], [30, "hlg-decoding"], [30, "id3"]], "HLG decoding + attention decoder rescoring": [[25, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[25, "colab-notebook"], [27, "colab-notebook"], [28, "colab-notebook"], [30, "colab-notebook"], [34, "colab-notebook"], [38, "colab-notebook"], [39, "colab-notebook"], [41, "colab-notebook"]], "Deployment with C++": [[25, "deployment-with-c"], [30, "deployment-with-c"]], "aishell": [[26, "aishell"]], "Stateless Transducer": [[27, "stateless-transducer"]], "The Model": [[27, "the-model"]], "The Loss": [[27, "the-loss"]], "Todo": [[27, "id1"]], "Greedy search": [[27, "greedy-search"]], "Beam search": [[27, "beam-search"]], "Modified Beam search": [[27, "modified-beam-search"]], "TDNN-LSTM CTC": [[28, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[29, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[30, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[30, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[30, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[30, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[30, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[31, "distillation-with-hubert"]], "Codebook index preparation": [[31, "codebook-index-preparation"]], "LibriSpeech": [[32, "librispeech"], [44, "librispeech"]], "Pruned transducer statelessX": [[33, "pruned-transducer-statelessx"], [46, "pruned-transducer-statelessx"]], "Usage example": [[33, "usage-example"], [35, "usage-example"], [36, "usage-example"], [45, "usage-example"], [46, "usage-example"], [47, "usage-example"]], "Export Model": [[33, "export-model"], [46, "export-model"], [47, "export-model"]], "Export model using torch.jit.script()": [[33, "export-model-using-torch-jit-script"], [35, "export-model-using-torch-jit-script"], [36, "export-model-using-torch-jit-script"], [46, "export-model-using-torch-jit-script"], [47, "export-model-using-torch-jit-script"]], "Download pretrained models": [[33, "download-pretrained-models"], [35, "download-pretrained-models"], [36, "download-pretrained-models"], [45, "download-pretrained-models"], [46, "download-pretrained-models"], [47, "download-pretrained-models"]], "Deploy with Sherpa": [[33, "deploy-with-sherpa"], [46, "deploy-with-sherpa"], [47, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[34, "tdnn-lstm-ctc"], [39, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[34, "inference-with-a-pre-trained-model"], [38, "inference-with-a-pre-trained-model"], [39, "inference-with-a-pre-trained-model"], [41, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[35, "zipformer-ctc-blank-skip"]], "Export models": [[35, "export-models"], [36, "export-models"], [45, "export-models"]], "Zipformer MMI": [[36, "zipformer-mmi"]], "TIMIT": [[37, "timit"]], "TDNN-LiGRU-CTC": [[38, "tdnn-ligru-ctc"]], "YesNo": [[40, "yesno"]], "TDNN-CTC": [[41, "tdnn-ctc"]], "Download kaldifeat": [[41, "download-kaldifeat"]], "Streaming ASR": [[42, "streaming-asr"]], "Streaming Conformer": [[43, "streaming-conformer"]], "Streaming Emformer": [[43, "streaming-emformer"]], "LSTM Transducer": [[45, "lstm-transducer"]], "Which model to use": [[45, "which-model-to-use"]], "Export model using torch.jit.trace()": [[45, "export-model-using-torch-jit-trace"], [47, "export-model-using-torch-jit-trace"]], "Simulate streaming decoding": [[46, "simulate-streaming-decoding"], [47, "simulate-streaming-decoding"]], "Real streaming decoding": [[46, "real-streaming-decoding"], [47, "real-streaming-decoding"]], "Zipformer Transducer": [[47, "zipformer-transducer"]], "Recipes": [[48, "recipes"]], "Table of Contents": [[48, null]]}, "indexentries": {}})