mirror of
https://github.com/k2-fsa/icefall.git
synced 2025-08-09 18:12:19 +00:00
1 line
80 KiB
JavaScript
1 line
80 KiB
JavaScript
Search.setIndex({"docnames": ["contributing/code-style", "contributing/doc", "contributing/how-to-create-a-recipe", "contributing/index", "faqs", "huggingface/index", "huggingface/pretrained-models", "huggingface/spaces", "index", "installation/index", "model-export/export-model-state-dict", "model-export/export-ncnn", "model-export/export-onnx", "model-export/export-with-torch-jit-script", "model-export/export-with-torch-jit-trace", "model-export/index", "recipes/Non-streaming-ASR/aishell/conformer_ctc", "recipes/Non-streaming-ASR/aishell/index", "recipes/Non-streaming-ASR/aishell/stateless_transducer", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/index", "recipes/Non-streaming-ASR/librispeech/conformer_ctc", "recipes/Non-streaming-ASR/librispeech/distillation", "recipes/Non-streaming-ASR/librispeech/index", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi", "recipes/Non-streaming-ASR/timit/index", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc", "recipes/Non-streaming-ASR/yesno/index", "recipes/Non-streaming-ASR/yesno/tdnn", "recipes/Streaming-ASR/index", "recipes/Streaming-ASR/introduction", "recipes/Streaming-ASR/librispeech/index", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless", "recipes/Streaming-ASR/librispeech/zipformer_transducer", "recipes/index"], "filenames": ["contributing/code-style.rst", "contributing/doc.rst", "contributing/how-to-create-a-recipe.rst", "contributing/index.rst", "faqs.rst", "huggingface/index.rst", "huggingface/pretrained-models.rst", "huggingface/spaces.rst", "index.rst", "installation/index.rst", "model-export/export-model-state-dict.rst", "model-export/export-ncnn.rst", "model-export/export-onnx.rst", "model-export/export-with-torch-jit-script.rst", "model-export/export-with-torch-jit-trace.rst", "model-export/index.rst", "recipes/Non-streaming-ASR/aishell/conformer_ctc.rst", "recipes/Non-streaming-ASR/aishell/index.rst", "recipes/Non-streaming-ASR/aishell/stateless_transducer.rst", "recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/index.rst", "recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst", "recipes/Non-streaming-ASR/librispeech/distillation.rst", "recipes/Non-streaming-ASR/librispeech/index.rst", "recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Non-streaming-ASR/librispeech/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst", "recipes/Non-streaming-ASR/librispeech/zipformer_mmi.rst", "recipes/Non-streaming-ASR/timit/index.rst", "recipes/Non-streaming-ASR/timit/tdnn_ligru_ctc.rst", "recipes/Non-streaming-ASR/timit/tdnn_lstm_ctc.rst", "recipes/Non-streaming-ASR/yesno/index.rst", "recipes/Non-streaming-ASR/yesno/tdnn.rst", "recipes/Streaming-ASR/index.rst", "recipes/Streaming-ASR/introduction.rst", "recipes/Streaming-ASR/librispeech/index.rst", "recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst", "recipes/Streaming-ASR/librispeech/pruned_transducer_stateless.rst", "recipes/Streaming-ASR/librispeech/zipformer_transducer.rst", "recipes/index.rst"], "titles": ["Follow the code style", "Contributing to Documentation", "How to create a recipe", "Contributing", "Frequently Asked Questions (FAQs)", "Huggingface", "Pre-trained models", "Huggingface spaces", "Icefall", "Installation", "Export model.state_dict()", "Export to ncnn", "Export to ONNX", "Export model with torch.jit.script()", "Export model with torch.jit.trace()", "Model export", "Conformer CTC", "aishell", "Stateless Transducer", "TDNN-LSTM CTC", "Non Streaming ASR", "Conformer CTC", "Distillation with HuBERT", "LibriSpeech", "Pruned transducer statelessX", "TDNN-LSTM-CTC", "Zipformer CTC Blank Skip", "Zipformer MMI", "TIMIT", "TDNN-LiGRU-CTC", "TDNN-LSTM-CTC", "YesNo", "TDNN-CTC", "Streaming ASR", "Introduction", "LibriSpeech", "LSTM Transducer", "Pruned transducer statelessX", "Zipformer Transducer", "Recipes"], "terms": {"we": [0, 1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "us": [0, 1, 2, 4, 5, 7, 8, 9, 11, 15, 16, 17, 18, 19, 21, 22, 25, 29, 30, 32, 34], "tool": [0, 4, 11, 36], "make": [0, 1, 3, 11, 16, 18, 21, 34, 36], "consist": [0, 18, 24, 36, 37, 38], "possibl": [0, 2, 3, 9, 16, 21], "black": 0, "format": [0, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "flake8": 0, "check": [0, 21], "qualiti": [0, 17], "isort": 0, "sort": [0, 9], "import": [0, 4, 11, 37, 38], "The": [0, 1, 2, 4, 7, 9, 10, 11, 16, 17, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "version": [0, 8, 9, 10, 11, 16, 18, 19, 21, 24, 25, 29, 30, 36, 37], "abov": [0, 9, 10, 11, 16, 17, 18, 19, 21, 24, 26, 27, 32, 34, 36, 37, 38], "ar": [0, 1, 3, 9, 10, 11, 12, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38, 39], "22": [0, 21, 29, 30, 32], "3": [0, 4, 8, 10, 15, 19, 22, 24, 25, 26, 27, 32, 36, 37, 38], "0": [0, 1, 4, 8, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "5": [0, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "4": [0, 4, 10, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "10": [0, 4, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "1": [0, 4, 8, 10, 12, 13, 14, 15, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "after": [0, 1, 7, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "run": [0, 2, 4, 7, 9, 11, 12, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "command": [0, 1, 4, 9, 10, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "git": [0, 9, 10, 11, 16, 18, 19, 21, 25, 29, 30, 32, 36], "clone": [0, 9, 10, 11, 16, 18, 19, 21, 25, 29, 30, 32, 36], "http": [0, 1, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "github": [0, 2, 6, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "com": [0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "k2": [0, 2, 6, 7, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 36, 37, 38], "fsa": [0, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 18, 21, 24, 26, 27, 36, 37, 38], "icefal": [0, 2, 3, 4, 6, 7, 10, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "cd": [0, 1, 2, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "pip": [0, 1, 4, 9, 11, 18, 36], "instal": [0, 1, 4, 5, 7, 8, 10, 15, 22, 24, 26, 27, 32, 36, 37, 38], "pre": [0, 3, 5, 7, 8, 9, 15, 22], "commit": 0, "whenev": 0, "you": [0, 1, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "automat": [0, 7, 22], "hook": 0, "invok": 0, "fail": [0, 9], "If": [0, 2, 7, 9, 11, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "ani": [0, 9, 16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37], "your": [0, 1, 2, 5, 7, 8, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "wa": [0, 9, 10, 21, 25], "success": [0, 9], "pleas": [0, 1, 2, 7, 9, 11, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "fix": [0, 4, 9, 11, 21], "issu": [0, 4, 9, 21, 22, 37, 38], "report": [0, 4, 9, 22], "some": [0, 1, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "i": [0, 1, 2, 4, 7, 9, 10, 11, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "e": [0, 2, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "modifi": [0, 15, 16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "file": [0, 2, 7, 8, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "place": [0, 9, 10, 18, 21, 25], "so": [0, 7, 8, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "statu": 0, "failur": 0, "see": [0, 1, 7, 9, 11, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "which": [0, 2, 7, 10, 11, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 37, 38], "ha": [0, 2, 8, 11, 12, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 34, 36, 37, 38], "been": [0, 11, 12, 18], "befor": [0, 1, 10, 11, 13, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "further": 0, "chang": [0, 4, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "all": [0, 6, 7, 10, 11, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "again": [0, 32], "should": [0, 2, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "succe": 0, "thi": [0, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "time": [0, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "succeed": 0, "want": [0, 9, 10, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "can": [0, 1, 2, 6, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "do": [0, 2, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "Or": 0, "without": [0, 5, 7, 16, 21, 36], "your_changed_fil": 0, "py": [0, 2, 4, 9, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "sphinx": 1, "write": [1, 2, 3], "have": [1, 2, 6, 7, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "prepar": [1, 3, 10], "environ": [1, 11, 16, 17, 18, 19, 21, 22, 24, 25, 29, 30, 32, 36, 37, 38], "doc": [1, 10], "r": [1, 9, 11, 29, 30], "requir": [1, 9, 22, 37, 38], "txt": [1, 9, 11, 16, 18, 19, 21, 25, 29, 30, 32], "set": [1, 9, 11, 16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "up": [1, 9, 10, 11, 16, 19, 21, 22, 24, 25, 26, 27, 37, 38], "readi": [1, 16, 21, 22], "refer": [1, 2, 9, 10, 11, 13, 14, 16, 18, 19, 21, 24, 25, 26, 29, 30, 32, 34, 37, 38], "restructuredtext": 1, "primer": 1, "familiar": 1, "build": [1, 9, 10, 11, 16, 18, 21, 36], "local": [1, 9, 24, 26, 27, 36, 37, 38], "preview": 1, "what": [1, 2, 9, 11, 18, 34], "look": [1, 2, 6, 9, 11, 16, 18, 19, 21, 22], "like": [1, 2, 7, 9, 11, 16, 18, 19, 21, 24, 26, 27, 32, 34, 36, 37], "publish": [1, 10, 17], "html": [1, 2, 4, 9, 11, 13, 14, 24, 36, 37, 38], "gener": [1, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "view": [1, 11, 16, 18, 19, 21, 24, 26, 27, 32, 36, 37, 38], "follow": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "python3": [1, 4, 9], "m": [1, 11, 18, 24, 26, 27, 29, 30, 36, 37, 38], "server": [1, 7, 9, 36], "It": [1, 2, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "print": [1, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "serv": [1, 24, 26, 27, 36, 37, 38], "port": [1, 22, 24, 26, 27, 36, 37, 38], "8000": [1, 32], "open": [1, 8, 10, 11, 17, 18, 21, 22], "browser": [1, 5, 7, 24, 26, 27, 36, 37, 38], "go": [1, 16, 18, 21, 24, 26, 27, 36, 37, 38], "read": [2, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "code": [2, 3, 4, 8, 11, 16, 21, 22, 24, 25, 29, 30, 32, 37, 38], "style": [2, 3, 8], "adjust": 2, "sytl": 2, "design": 2, "python": [2, 9, 10, 11, 13, 14, 16, 18, 21, 24, 26, 27, 36, 37, 38], "recommend": [2, 9, 16, 18, 19, 21, 22, 24, 37, 38], "test": [2, 8, 10, 12, 15, 16, 18, 19, 21, 22, 25, 26, 29, 30], "valid": [2, 9, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "dataset": [2, 4, 9, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "lhots": [2, 8, 10, 11, 16, 18, 21], "readthedoc": [2, 9], "io": [2, 9, 11, 13, 14, 24, 36, 37, 38], "en": [2, 9, 11], "latest": [2, 7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "index": [2, 9, 11, 13, 14, 36, 37, 38], "yesno": [2, 4, 8, 9, 20, 32, 39], "veri": [2, 3, 11, 18, 29, 30, 32, 37, 38], "good": 2, "exampl": [2, 7, 8, 10, 11, 12, 13, 14, 22, 25, 29, 30, 32], "speech": [2, 7, 8, 9, 11, 12, 17, 18, 32, 39], "pull": [2, 11, 16, 18, 21, 34], "380": [2, 11, 30], "show": [2, 7, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "add": [2, 11, 16, 18, 19, 37, 39], "new": [2, 3, 7, 9, 11, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 32, 36, 37, 38], "suppos": [2, 37, 38], "would": [2, 9, 10, 11, 21, 25, 37, 38], "name": [2, 10, 11, 16, 18, 24, 26, 27, 37, 38], "foo": [2, 12, 14, 16, 21, 24, 26, 27, 36, 37, 38], "eg": [2, 4, 6, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "mkdir": [2, 11, 16, 18, 19, 21, 25, 29, 30, 32, 36], "p": [2, 9, 11, 18, 29, 30, 36], "asr": [2, 4, 6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "touch": 2, "sh": [2, 9, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "chmod": 2, "x": [2, 11, 34], "simpl": [2, 18], "own": [2, 22, 24, 37, 38], "otherwis": [2, 11, 16, 18, 21, 22, 24, 26, 27, 36, 37, 38], "librispeech": [2, 6, 8, 10, 11, 12, 13, 14, 20, 21, 22, 24, 25, 26, 27, 33, 34, 36, 37, 38, 39], "assum": [2, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 29, 30, 32, 36, 37, 38], "fanci": 2, "call": [2, 4, 22], "bar": [2, 12, 14, 16, 21, 24, 26, 27, 36, 37, 38], "organ": 2, "wai": [2, 3, 15, 24, 26, 27, 34, 36, 37, 38], "readm": [2, 16, 18, 19, 21, 25, 29, 30, 32], "md": [2, 6, 10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "asr_datamodul": [2, 4, 9], "pretrain": [2, 10, 11, 12, 14, 16, 18, 19, 21, 25, 29, 30, 32], "For": [2, 6, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "instanc": [2, 6, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "tdnn": [2, 4, 9, 17, 20, 23, 28, 31], "its": [2, 10, 11, 14, 18, 26], "directori": [2, 8, 9, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "structur": 2, "descript": [2, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "contain": [2, 8, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38, 39], "inform": [2, 10, 16, 18, 19, 21, 24, 25, 26, 29, 30, 32, 34, 36, 37, 38], "g": [2, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "wer": [2, 9, 10, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "etc": [2, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "provid": [2, 7, 9, 10, 11, 12, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38, 39], "pytorch": [2, 4, 8, 11, 18], "dataload": [2, 9], "take": [2, 10, 22, 24, 32, 37, 38], "input": [2, 10, 11, 16, 18, 19, 21, 25, 29, 30, 32, 34], "checkpoint": [2, 9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "save": [2, 9, 10, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "dure": [2, 7, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "stage": [2, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "": [2, 9, 10, 11, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "definit": 2, "neural": [2, 16, 21], "network": [2, 16, 18, 21, 24, 26, 27, 36, 37, 38], "script": [2, 8, 9, 14, 15, 16, 18, 19, 21, 22, 25, 29, 30, 32, 36], "infer": [2, 10, 12], "tdnn_lstm_ctc": [2, 19, 25, 30], "conformer_ctc": [2, 16, 21], "get": [2, 7, 9, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 32, 36, 37, 38], "feel": [2, 22, 36], "result": [2, 6, 7, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "everi": [2, 10, 24, 26, 27, 36, 37, 38], "kept": [2, 24, 37, 38], "self": [2, 11, 34], "toler": 2, "duplic": 2, "among": [2, 9], "differ": [2, 9, 11, 16, 17, 21, 22, 24, 34, 36, 37, 38], "invoc": 2, "help": [2, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "blob": [2, 6, 10, 14, 24, 26, 27, 36, 37, 38], "master": [2, 6, 10, 12, 13, 14, 18, 22, 24, 26, 27, 36, 37, 38], "transform": [2, 16, 21, 36], "conform": [2, 12, 13, 17, 18, 20, 23, 24, 26, 36, 37, 38], "base": [2, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "lstm": [2, 14, 15, 17, 20, 23, 28, 33, 35], "attent": [2, 18, 19, 22, 34, 37, 38], "lm": [2, 9, 18, 24, 25, 29, 30, 32, 37, 38], "rescor": [2, 19, 25, 27, 29, 30, 32], "demonstr": [2, 5, 7, 10], "consid": 2, "colab": 2, "notebook": 2, "welcom": 3, "There": [3, 11, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "mani": [3, 37, 38], "two": [3, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "them": [3, 5, 6, 7, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "To": [3, 7, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "document": [3, 8, 10, 11, 27], "repositori": [3, 11], "recip": [3, 6, 8, 9, 10, 16, 18, 19, 21, 22, 24, 25, 29, 30, 32, 34, 36, 37, 38], "In": [3, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 25, 29, 30, 32, 34], "page": [3, 7, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "describ": [3, 5, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 24, 25, 29, 30, 37, 38], "how": [3, 5, 7, 8, 9, 11, 15, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "creat": [3, 8, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37], "data": [3, 10, 11, 12, 13, 14, 17], "train": [3, 4, 5, 7, 8, 10, 13, 14, 15, 34], "decod": [3, 4, 7, 11, 12, 14, 15], "model": [3, 5, 7, 8, 9, 22, 34], "section": [4, 5, 9, 10, 12, 13, 14, 15, 16, 21], "collect": [4, 9], "user": 4, "post": 4, "correspond": [4, 6, 7], "solut": 4, "One": 4, "torch": [4, 8, 9, 10, 15, 16, 18, 21], "torchaudio": [4, 8, 34], "cu111": 4, "torchvis": 4, "11": [4, 9, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "f": [4, 9, 29, 30], "download": [4, 7, 8, 15, 17, 22], "org": [4, 9, 17, 18, 24, 36, 37, 38], "whl": [4, 9], "torch_stabl": 4, "throw": [4, 11], "error": [4, 9, 11, 21], "when": [4, 7, 11, 15, 18, 21, 22, 24, 26, 27, 37, 38], "specifi": [4, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "cuda": [4, 9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 36, 37, 38], "while": [4, 9, 11, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "That": [4, 22, 24, 36, 37, 38], "cu11": 4, "therefor": 4, "correct": 4, "log": [4, 9, 11, 25, 29, 30, 32], "traceback": 4, "most": [4, 37, 38], "recent": [4, 11], "last": 4, "line": [4, 9, 11, 24, 37, 38], "14": [4, 9, 10, 11, 12, 13, 16, 21, 24, 25, 26, 29, 36, 37, 38], "from": [4, 5, 7, 9, 10, 11, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "yesnoasrdatamodul": 4, "home": [4, 11, 16, 21], "xxx": [4, 10, 11], "next": [4, 7, 9, 11, 21, 22, 24, 25, 26, 27, 36, 37, 38], "gen": [4, 7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "kaldi": [4, 7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "34": [4, 9], "datamodul": 4, "__init__": [4, 10, 11, 16, 18, 21], "23": [4, 9, 11, 16, 18, 19, 21, 29, 30, 32], "util": [4, 9, 21], "add_eo": 4, "add_so": 4, "get_text": 4, "39": [4, 9, 11, 18, 21, 25, 29], "tensorboard": [4, 9, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "summarywrit": 4, "miniconda3": 4, "env": 4, "yyi": 4, "lib": [4, 9], "8": [4, 9, 10, 11, 16, 18, 21, 22, 24, 25, 26, 27, 32, 36, 37, 38], "site": [4, 9], "packag": [4, 9], "loosevers": 4, "uninstal": 4, "setuptool": [4, 9], "58": [4, 21], "find": [5, 6, 7, 9, 10, 11, 14, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "also": [5, 6, 9, 10, 11, 12, 14, 16, 18, 19, 21, 24, 26, 27, 32, 34, 36, 37, 38], "try": [5, 7, 22, 24, 26, 27, 36, 37, 38], "within": [5, 7], "anyth": [5, 7], "space": [5, 8], "youtub": [5, 8, 21, 22, 24, 25, 26, 27, 36, 37, 38], "video": [5, 8, 21, 22, 24, 25, 26, 27, 36, 37, 38], "upload": [6, 7, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "huggingfac": [6, 8, 10, 11, 16, 18, 19, 21, 25, 26, 27, 29, 30, 32, 36], "co": [6, 7, 10, 11, 16, 17, 18, 19, 21, 25, 26, 27, 29, 30, 32, 36], "visit": [6, 7, 24, 26, 27, 36, 37, 38], "link": [6, 9, 10, 11, 24, 26, 27, 36, 37, 38], "search": [6, 7], "specif": [6, 18], "aishel": [6, 8, 16, 18, 19, 20, 39], "gigaspeech": [6, 13, 36], "wenetspeech": [6, 13], "integr": 7, "framework": [7, 12, 24, 37], "sherpa": [7, 12, 13, 14, 15, 36], "need": [7, 9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "window": [7, 11, 12, 36], "maco": [7, 11, 12, 36], "linux": [7, 11, 12, 36], "even": [7, 9], "ipad": 7, "phone": 7, "start": [7, 9, 10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "address": [7, 10, 11, 18, 24, 27, 36, 37, 38], "recognit": [7, 8, 11, 12, 17, 18, 32, 39], "screenshot": [7, 16, 18, 19, 21, 22, 24, 32, 36, 37], "select": [7, 11, 24, 25, 29, 30, 32, 36, 37, 38], "languag": [7, 16, 18, 19], "current": [7, 9, 18, 22, 34, 36, 37, 38, 39], "chines": [7, 17, 18], "english": [7, 32, 36], "target": 7, "method": [7, 9, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 36, 37, 38], "greedi": 7, "modified_beam_search": [7, 18, 22, 24, 26, 36, 37, 38], "choos": [7, 9, 22, 24, 26, 27, 36, 37, 38], "number": [7, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "activ": 7, "path": [7, 10, 11, 12, 14, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "either": [7, 16, 18, 19, 21, 37, 38], "record": [7, 16, 17, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "click": [7, 9, 16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "button": 7, "submit": 7, "wait": 7, "moment": 7, "an": [7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 21, 22, 24, 27, 32, 36, 37, 38], "bottom": [7, 24, 26, 27, 36, 37, 38], "part": [7, 9, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "tabl": [7, 11], "one": [7, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "subscrib": [7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "channel": [7, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "nadira": [7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "povei": [7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "www": [7, 9, 17, 21, 22, 24, 25, 26, 27, 36, 37, 38], "uc_vaumpkminz1pnkfxan9mw": [7, 9, 21, 22, 24, 25, 26, 27, 36, 37, 38], "2": [8, 10, 12, 15, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "frequent": 8, "ask": 8, "question": 8, "faq": 8, "oserror": 8, "libtorch_hip": 8, "cannot": [8, 11], "share": [8, 9], "object": [8, 9, 16, 18, 19, 24, 32, 36, 37], "attributeerror": 8, "modul": [8, 9, 11, 26, 37], "distutil": 8, "attribut": [8, 21], "export": [8, 9, 16, 18, 19, 21, 22, 25, 29, 30, 32], "state_dict": [8, 15, 16, 18, 19, 21, 25, 29, 30, 32], "jit": [8, 15, 21], "trace": [8, 13, 15], "onnx": [8, 10, 15], "ncnn": [8, 15], "non": [8, 12, 21, 34, 37, 39], "stream": [8, 11, 12, 16, 21, 29, 30, 36, 39], "timit": [8, 20, 29, 30, 39], "introduct": [8, 33, 39], "contribut": 8, "depend": [9, 16, 21, 36], "step": [9, 10, 11, 16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "order": [9, 11, 16, 19, 21, 25, 29, 30], "matter": [9, 11], "least": 9, "v1": [9, 16, 19, 21, 25, 29, 30], "9": [9, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 32, 36, 37, 38], "alreadi": [9, 10], "don": [9, 11, 13, 16, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "t": [9, 11, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "replac": 9, "compil": [9, 11, 16, 18, 21], "against": 9, "strongli": 9, "variabl": [9, 11, 16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "pythonpath": [9, 11, 36], "point": [9, 10, 16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "folder": [9, 10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "tmp": [9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "setup": [9, 11, 16, 18, 19, 21, 22, 24, 25, 29, 30, 32, 36, 37, 38], "put": [9, 11, 26, 37], "sever": [9, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "same": [9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "switch": [9, 16, 21, 27], "just": [9, 11, 34], "about": [9, 11, 18, 22, 24, 27, 36, 37, 38], "virtualenv": 9, "cpython3": 9, "6": [9, 15, 16, 18, 21, 24, 25, 29, 30, 36], "final": [9, 10, 21, 25], "64": [9, 10, 18, 37], "1540m": 9, "creator": 9, "cpython3posix": 9, "dest": 9, "ceph": [9, 10, 16, 18, 21], "fj": [9, 10, 11, 18, 21], "fangjun": [9, 10, 11, 18, 21], "clear": 9, "fals": [9, 10, 11, 16, 18, 21, 22], "no_vcs_ignor": 9, "global": 9, "seeder": 9, "fromappdata": 9, "bundl": 9, "wheel": [9, 11, 36], "via": [9, 13, 14, 15], "copi": [9, 34], "app_data_dir": 9, "root": [9, 11], "v": [9, 11, 21, 29, 30], "irtualenv": 9, "ad": [9, 11, 16, 18, 19, 21, 24, 26, 27, 32, 36, 37, 38], "seed": 9, "21": [9, 10, 16, 18, 21, 29, 30], "57": [9, 21, 25], "36": [9, 11, 18, 21, 22], "bashactiv": 9, "cshellactiv": 9, "fishactiv": 9, "powershellactiv": 9, "pythonactiv": 9, "xonshactiv": 9, "sourc": [9, 10, 11, 16, 17, 18, 21], "bin": [9, 11, 16, 21, 36], "dev20210822": 9, "cpu": [9, 10, 11, 13, 16, 24, 26, 27, 32, 37, 38], "torch1": 9, "nightli": 9, "2bcpu": 9, "cp38": 9, "linux_x86_64": 9, "mb": [9, 11], "________________________________": 9, "185": [9, 16, 21, 32], "kb": [9, 11, 29, 30], "graphviz": 9, "17": [9, 10, 11, 16, 21, 29, 30, 36], "py3": 9, "none": [9, 16, 21], "18": [9, 11, 16, 18, 19, 21, 24, 25, 29, 30, 36, 37, 38], "cach": 9, "manylinux1_x86_64": 9, "831": [9, 18, 30], "type": [9, 10, 11, 16, 18, 21, 24, 26, 27, 32, 34, 36, 37, 38], "extens": 9, "typing_extens": 9, "26": [9, 18, 21, 30], "successfulli": [9, 11], "probabl": [9, 18, 24, 26, 36, 37, 38], "req": 9, "7b1b76ge": 9, "q": 9, "audioread": 9, "soundfil": 9, "post1": 9, "py2": 9, "7": [9, 10, 11, 16, 19, 21, 24, 25, 29, 30, 36, 37], "97": [9, 16], "cytoolz": 9, "manylinux_2_17_x86_64": 9, "manylinux2014_x86_64": 9, "dataclass": 9, "h5py": 9, "manylinux_2_12_x86_64": 9, "manylinux2010_x86_64": 9, "684": [9, 16, 32], "intervaltre": 9, "lilcom": 9, "numpi": 9, "15": [9, 10, 11, 18, 19, 21, 29, 32], "40": [9, 11, 19, 21, 25, 29, 30], "pyyaml": 9, "662": 9, "tqdm": 9, "62": [9, 21, 25], "76": [9, 32], "73": 9, "satisfi": 9, "dev": [9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "2a1410b": 9, "clean": [9, 16, 18, 21, 22, 24, 25, 26, 27, 36, 37, 38], "toolz": 9, "55": [9, 19, 21, 29], "sortedcontain": 9, "29": [9, 16, 18, 19, 21, 25, 29, 30], "cffi": 9, "411": [9, 21], "pycpars": 9, "20": [9, 10, 16, 18, 19, 21, 24, 25, 29, 30, 32, 37], "112": [9, 11], "pypars": 9, "67": 9, "done": [9, 10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "filenam": [9, 11, 12, 13, 14, 26, 27, 36, 38], "dev_2a1410b_clean": 9, "size": [9, 10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "342242": 9, "sha256": 9, "f683444afa4dc0881133206b4646a": 9, "9d0f774224cc84000f55d0a67f6e4a37997": 9, "store": [9, 21], "ephem": 9, "ftu0qysz": 9, "7f": 9, "7a": 9, "8e": 9, "a0bf241336e2e3cb573e1e21e5600952d49f5162454f2e612f": 9, "warn": 9, "built": 9, "invalid": [9, 21], "metadata": [9, 29, 30], "mandat": 9, "pep": 9, "440": 9, "packa": 9, "ging": 9, "deprec": [9, 18], "legaci": 9, "becaus": 9, "could": [9, 11, 16, 19], "A": [9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 36, 37, 38], "discuss": 9, "regard": 9, "pypa": 9, "sue": 9, "8368": 9, "inter": 9, "valtre": 9, "sor": 9, "tedcontain": 9, "remot": 9, "enumer": 9, "500": [9, 10, 11, 18, 21, 27, 36], "count": 9, "100": [9, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "compress": 9, "308": [9, 16, 18, 19], "total": [9, 16, 18, 19, 21, 22, 24, 25, 32, 36, 37], "delta": 9, "263": 9, "reus": 9, "307": 9, "102": [9, 16], "pack": [9, 37, 38], "receiv": 9, "172": 9, "49": [9, 21, 30, 32], "kib": 9, "385": 9, "00": [9, 11, 16, 18, 19, 21, 25, 29, 30, 32], "resolv": 9, "kaldilm": 9, "tar": 9, "gz": 9, "48": [9, 16, 18], "574": 9, "kaldialign": 9, "sentencepiec": [9, 21], "96": 9, "41": [9, 11, 16, 18, 29, 32], "absl": 9, "absl_pi": 9, "13": [9, 10, 11, 18, 19, 21, 25, 26, 29], "132": 9, "googl": [9, 24, 26, 27, 36, 37, 38], "auth": 9, "oauthlib": 9, "google_auth_oauthlib": 9, "grpcio": 9, "24": [9, 19, 25, 29, 30, 32], "ment": 9, "12": [9, 10, 11, 16, 18, 19, 21, 24, 26, 27, 29, 32, 36, 37, 38], "requi": 9, "rement": 9, "protobuf": 9, "manylinux_2_5_x86_64": 9, "werkzeug": 9, "288": 9, "tensorboard_data_serv": 9, "google_auth": 9, "35": [9, 10, 18, 21, 36], "152": 9, "request": [9, 34], "plugin": 9, "wit": 9, "tensorboard_plugin_wit": 9, "781": 9, "markdown": 9, "six": 9, "16": [9, 10, 14, 16, 18, 19, 21, 24, 25, 29, 30, 32, 36, 37, 38], "cachetool": 9, "rsa": 9, "pyasn1": 9, "pyasn1_modul": 9, "155": 9, "requests_oauthlib": 9, "77": [9, 21], "urllib3": 9, "27": [9, 11, 16, 18, 25, 30], "138": [9, 16, 18], "certifi": 9, "2017": 9, "2021": [9, 16, 19, 21, 25, 29, 30, 32], "30": [9, 11, 16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "145": 9, "charset": 9, "normal": [9, 25, 29, 30, 32, 37], "charset_norm": 9, "idna": 9, "59": [9, 11, 19, 21], "146": 9, "897233": 9, "eccb906cafcd45bf9a7e1a1718e4534254bfb": 9, "f4c0d0cbc66eee6c88d68a63862": 9, "85": 9, "7d": 9, "63": [9, 18], "f2dd586369b8797cb36d213bf3a84a789eeb92db93d2e723c9": 9, "etool": 9, "oaut": 9, "hlib": 9, "let": [9, 11, 16, 21, 36], "u": [9, 11, 16, 18, 19, 21, 22, 32, 36], "08": [9, 21, 25, 27, 29, 30, 32, 36], "19": [9, 10, 16, 21, 25, 29, 30], "main": [9, 16, 21, 34], "dl_dir": [9, 16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "waves_yesno": 9, "49mb": 9, "03": [9, 10, 18, 21, 29, 30, 36], "39mb": 9, "manifest": [9, 22], "31": [9, 11, 21], "42": [9, 16, 21, 32], "comput": [9, 10, 11, 16, 18, 19, 22, 24, 25, 27, 29, 30, 32, 36, 37, 38], "fbank": [9, 10, 11, 16, 18, 19, 21, 25, 29, 30, 32], "32": [9, 11, 16, 18, 19, 38], "803": 9, "info": [9, 10, 11, 16, 18, 19, 21, 25, 29, 30, 32], "compute_fbank_yesno": 9, "52": [9, 16, 21], "process": [9, 10, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "extract": [9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "featur": [9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "_______________________________________________________________": 9, "90": 9, "01": [9, 11, 18, 19, 21, 22], "80": [9, 10, 11, 16, 18, 21], "57it": 9, "085": 9, "______________________________________________________________": 9, "248": [9, 18], "21it": 9, "lang": [9, 10, 18, 21, 27], "fcordre9": 9, "kaldilm_6899d26f2d684ad48f21025950cd2866": 9, "csrc": [9, 21], "arpa_file_pars": 9, "cc": 9, "void": 9, "arpafilepars": 9, "rea": 9, "d": [9, 29, 30], "std": 9, "istream": 9, "79": 9, "140": [9, 19], "gram": [9, 16, 18, 19, 24, 25, 27, 29, 30, 37, 38], "89": [9, 16], "hlg": [9, 25, 29, 30, 32], "928": 9, "compile_hlg": 9, "120": 9, "lang_phon": [9, 19, 25, 29, 30, 32], "929": [9, 18], "lexicon": [9, 16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "116": 9, "convert": [9, 11, 21, 36], "l": [9, 11, 18, 29, 30, 32], "pt": [9, 10, 11, 13, 14, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "linv": [9, 18, 21, 32], "931": 9, "ctc_topo": 9, "max_token_id": 9, "932": 9, "load": [9, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "fst": [9, 18, 32], "intersect": [9, 24, 37, 38], "933": 9, "lg": [9, 24, 27, 37, 38], "shape": 9, "66": 9, "connect": [9, 10, 21, 24, 25, 36, 37, 38], "68": [9, 21], "70": 9, "class": [9, 21], "tensor": [9, 16, 18, 19, 21, 24, 32, 36, 37], "71": [9, 21, 25], "determin": 9, "934": 9, "74": [9, 10], "_k2": 9, "raggedint": 9, "remov": [9, 16, 18, 19, 21, 25, 29, 30], "disambigu": 9, "symbol": [9, 18, 24, 37, 38], "87": 9, "remove_epsilon": 9, "935": 9, "92": [9, 21], "arc": 9, "95": [9, 17], "compos": 9, "h": 9, "105": [9, 21], "936": 9, "107": [9, 25], "123": 9, "now": [9, 11, 16, 21, 22, 24, 25, 26, 27, 29, 30, 36, 37, 38], "cuda_visible_devic": [9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "gpu": [9, 16, 18, 19, 21, 22, 24, 26, 27, 29, 30, 32, 36, 37, 38], "avail": [9, 10, 11, 16, 18, 21, 25, 29, 30, 32, 36], "case": [9, 10, 11, 24, 26, 27, 36, 37, 38], "segment": 9, "fault": 9, "core": 9, "dump": 9, "protocol_buffers_python_implement": 9, "more": [9, 11, 16, 21, 22, 32, 34, 36, 37], "674": 9, "interest": [9, 22, 24, 26, 27, 36, 37, 38], "given": [9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 37, 38], "below": [9, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37], "072": 9, "465": 9, "466": 9, "exp_dir": [9, 11, 18, 21, 22, 24, 26, 27, 37, 38], "posixpath": [9, 11, 18, 21], "exp": [9, 10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "lang_dir": [9, 18, 21], "lr": [9, 18, 36], "feature_dim": [9, 10, 11, 16, 18, 21, 32], "weight_decai": 9, "1e": 9, "06": [9, 10, 11, 19, 21, 25, 32], "start_epoch": 9, "best_train_loss": [9, 10, 11], "inf": [9, 10, 11], "best_valid_loss": [9, 10, 11], "best_train_epoch": [9, 10, 11], "best_valid_epoch": [9, 10], "batch_idx_train": [9, 10, 11], "log_interv": [9, 10, 11], "valid_interv": [9, 10, 11], "beam_siz": [9, 10, 18], "reduct": [9, 26], "sum": 9, "use_doub": 9, "le_scor": 9, "true": [9, 10, 11, 16, 18, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "world_siz": [9, 22], "master_port": 9, "12354": 9, "num_epoch": 9, "feature_dir": [9, 21], "max_dur": [9, 21], "bucketing_sampl": [9, 21], "num_bucket": [9, 21], "concatenate_cut": [9, 21], "duration_factor": [9, 21], "gap": [9, 21], "on_the_fly_feat": [9, 21], "shuffl": [9, 21], "return_cut": [9, 21], "num_work": [9, 21], "074": 9, "113": [9, 18, 21], "098": [9, 25], "cut": [9, 21], "240": [9, 16, 32], "149": [9, 11, 21], "200": [9, 10, 11, 16, 21, 22, 29, 30, 32], "singlecutsampl": 9, "206": [9, 21], "219": [9, 18, 21], "246": [9, 18, 21, 29, 30], "357": 9, "416": 9, "epoch": [9, 10, 11, 12, 13, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "batch": [9, 11, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "avg": [9, 10, 11, 12, 13, 14, 18, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "loss": [9, 16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "0789": 9, "848": 9, "5356": 9, "7556": 9, "301": [9, 10, 21], "432": [9, 21], "9972": 9, "best": [9, 11, 16, 19, 21], "805": 9, "2436": 9, "5717": 9, "33": [9, 16, 17, 18, 21, 29], "109": [9, 16, 21], "4167": 9, "121": [9, 25], "325": 9, "2214": 9, "798": [9, 18], "0781": 9, "1343": 9, "065": 9, "0859": 9, "556": 9, "0421": 9, "0975": 9, "810": 9, "0431": 9, "824": 9, "657": 9, "0109": 9, "984": [9, 21], "0093": 9, "0096": 9, "50": [9, 10, 11, 21, 24, 29, 36, 37, 38], "239": [9, 18], "0104": 9, "0101": 9, "569": 9, "0092": 9, "819": [9, 29], "835": 9, "51": [9, 11, 16, 21, 32], "024": 9, "0105": 9, "317": 9, "0099": 9, "0097": 9, "552": 9, "0108": 9, "869": 9, "0102": 9, "126": [9, 21], "128": [9, 21], "537": [9, 21], "192": [9, 21], "249": 9, "250": [9, 18, 25], "lm_dir": [9, 21], "search_beam": [9, 16, 21, 32], "output_beam": [9, 16, 21, 32], "min_active_st": [9, 16, 21, 32], "max_active_st": [9, 16, 21, 32], "10000": [9, 16, 21, 32], "use_double_scor": [9, 16, 21, 32], "193": 9, "213": [9, 32], "259": [9, 16], "devic": [9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 37, 38], "217": [9, 16, 21], "279": [9, 21], "averag": [9, 10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "userwarn": [9, 18], "floor_divid": 9, "futur": [9, 18, 39], "round": [9, 18], "toward": [9, 18], "trunc": [9, 18], "function": [9, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "NOT": [9, 16, 18, 21, 32], "floor": [9, 18], "incorrect": [9, 18], "neg": [9, 18], "valu": [9, 11, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "keep": [9, 18, 24, 37, 38], "behavior": [9, 18], "div": [9, 18], "b": [9, 18, 21, 29, 30], "rounding_mod": [9, 18], "actual": [9, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "divis": [9, 18], "trigger": 9, "intern": 9, "aten": 9, "src": [9, 11, 36], "nativ": 9, "binaryop": 9, "cpp": [9, 11, 13], "450": [9, 16, 18, 19], "k": [9, 24, 29, 30, 36, 37, 38], "n": [9, 16, 22, 24, 26, 27, 29, 30, 36, 37, 38], "220": [9, 11, 18, 19, 21], "409": 9, "190": [9, 25], "until": [9, 21, 26], "571": [9, 21], "228": [9, 21], "transcript": [9, 16, 17, 18, 19, 21, 24, 25, 29, 30, 36, 37, 38], "recog": [9, 18, 21], "test_set": [9, 32], "572": 9, "ins": [9, 21, 32], "del": [9, 21, 32], "sub": [9, 21, 32], "573": 9, "236": 9, "wrote": [9, 21], "detail": [9, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "stat": [9, 21], "err": [9, 18, 21], "299": 9, "congratul": [9, 11, 16, 19, 21, 25, 29, 30, 32], "first": [9, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "fun": [9, 11], "debug": 9, "variou": [9, 15, 39], "problem": [9, 22], "mai": [9, 11, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38, 39], "encount": [9, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "period": [10, 11], "disk": 10, "optim": [10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "other": [10, 18, 21, 22, 24, 25, 29, 30, 32, 34, 37, 38, 39], "relat": [10, 16, 18, 21, 25, 29, 30, 32], "resum": [10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "howev": [10, 22], "onli": [10, 11, 12, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38, 39], "strip": 10, "except": 10, "reduc": [10, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "each": [10, 16, 18, 19, 21, 24, 26, 27, 34, 36, 37, 38], "well": [10, 32, 39], "usag": [10, 11, 13, 14, 25, 29, 30, 32], "pruned_transducer_stateless3": [10, 12, 13, 34], "almost": [10, 24, 34, 37, 38], "dir": [10, 11, 12, 13, 14, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "bpe": [10, 11, 12, 13, 14, 21, 24, 26, 27, 36, 37, 38], "lang_bpe_500": [10, 11, 12, 13, 14, 21, 24, 26, 27, 36, 37, 38], "dict": 10, "host": 10, "csukuangfj": [10, 11, 16, 18, 19, 21, 25, 29, 30, 32, 36], "prune": [10, 18, 20, 23, 33, 34, 35, 36, 38], "transduc": [10, 12, 15, 17, 20, 23, 33, 34, 35], "stateless3": [10, 11], "2022": [10, 11, 18, 24, 26, 27, 36, 37], "05": [10, 11, 16, 18, 19, 21, 30], "lf": [10, 11, 16, 18, 19, 21, 25, 27, 29, 30, 32], "repo": 10, "prefix": 10, "those": 10, "wave": [10, 11, 12, 16, 21], "iter": [10, 11, 14, 24, 26, 27, 36, 37, 38], "1224000": 10, "greedy_search": [10, 18, 24, 26, 36, 37, 38], "test_wav": [10, 11, 16, 18, 19, 21, 25, 29, 30, 32], "1089": [10, 11, 21, 25], "134686": [10, 11, 21, 25], "0001": [10, 11, 21, 25], "wav": [10, 11, 12, 14, 16, 18, 19, 21, 24, 26, 27, 29, 30, 32, 36, 37, 38], "1221": [10, 21, 25], "135766": [10, 21, 25], "0002": [10, 21, 25], "multipl": [10, 16, 18, 19, 21, 25, 29, 30, 32], "sound": [10, 11, 14, 16, 18, 19, 21, 25, 29, 30, 32], "Its": [10, 11, 21], "output": [10, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "09": [10, 16, 18, 19, 21, 36], "02": [10, 11, 18, 21, 24, 30, 36, 37], "233": 10, "265": 10, "reset_interv": [10, 11], "3000": [10, 11], "subsampling_factor": [10, 16, 18, 21], "encoder_dim": [10, 11], "512": [10, 11, 16, 18, 21], "nhead": [10, 11, 16, 18, 21, 24, 37], "dim_feedforward": [10, 11, 18], "2048": [10, 11, 18], "num_encoder_lay": [10, 11, 18], "decoder_dim": [10, 11], "joiner_dim": [10, 11], "model_warm_step": [10, 11], "env_info": [10, 11, 16, 18, 21], "releas": [10, 11, 16, 18, 21, 36], "sha1": [10, 11, 16, 18, 21], "4810e00d8738f1a21278b0156a42ff396a2d40ac": 10, "date": [10, 11, 16, 18, 21], "fri": 10, "oct": [10, 21], "miss": [10, 11, 18, 21], "cu102": [10, 11], "branch": [10, 11, 16, 18, 21, 26], "1013": 10, "c39cba5": 10, "dirti": [10, 11, 16, 21], "thu": [10, 11, 18, 21, 25], "jsonl": 10, "hostnam": [10, 11, 18], "de": [10, 11, 18], "74279": [10, 11, 18], "0324160024": 10, "65bfd8b584": 10, "jjlbn": 10, "ip": [10, 11, 18], "177": [10, 18, 19, 21], "203": [10, 21], "bpe_model": [10, 11, 21], "sound_fil": [10, 16, 18, 21, 32], "sample_r": [10, 16, 18, 21, 32], "16000": [10, 16, 18, 19, 21, 25, 26, 29, 30], "beam": [10, 36], "max_context": 10, "max_stat": 10, "context_s": [10, 11, 18], "max_sym_per_fram": [10, 18], "simulate_stream": 10, "decode_chunk_s": 10, "left_context": 10, "dynamic_chunk_train": 10, "causal_convolut": 10, "short_chunk_s": [10, 37, 38], "25": [10, 16, 21, 24, 29, 30, 32, 37], "num_left_chunk": 10, "blank_id": [10, 11, 18], "unk_id": 10, "vocab_s": [10, 11, 18], "271": 10, "273": [10, 18], "612": 10, "458": 10, "disabl": [10, 11], "giga": [10, 36], "623": 10, "277": 10, "paramet": [10, 11, 13, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 36, 37, 38], "78648040": 10, "951": [10, 21], "285": [10, 18, 21], "construct": [10, 11, 16, 18, 19, 21, 25, 29, 30, 32], "952": 10, "295": [10, 16, 18, 19, 21], "957": 10, "700": 10, "329": [10, 21], "912": 10, "388": 10, "earli": [10, 11, 21, 25], "nightfal": [10, 11, 21, 25], "THE": [10, 11, 21, 25], "yellow": [10, 11, 21, 25], "lamp": [10, 11, 21, 25], "light": [10, 11, 21, 25], "here": [10, 11, 16, 18, 19, 21, 22, 25, 34, 36, 37], "AND": [10, 11, 21, 25], "THERE": [10, 11, 21, 25], "squalid": [10, 11, 21, 25], "quarter": [10, 11, 21, 25], "OF": [10, 11, 21, 25], "brothel": [10, 11, 21, 25], "god": [10, 21, 25], "AS": [10, 21, 25], "direct": [10, 21, 25], "consequ": [10, 21, 25], "sin": [10, 21, 25], "man": [10, 21, 25], "punish": [10, 21, 25], "had": [10, 21, 25], "her": [10, 21, 25], "love": [10, 21, 25], "child": [10, 21, 25], "whose": [10, 18, 21, 25], "ON": [10, 11, 21, 25, 36], "THAT": [10, 21, 25], "dishonor": [10, 21, 25], "bosom": [10, 21, 25], "TO": [10, 21, 25], "parent": [10, 21, 25], "forev": [10, 21, 25], "WITH": [10, 21, 25], "race": [10, 21, 25], "descent": [10, 21, 25], "mortal": [10, 21, 25], "BE": [10, 21, 25], "bless": [10, 21, 25], "soul": [10, 21, 25], "IN": [10, 21, 25], "heaven": [10, 21, 25], "yet": [10, 11, 21, 25], "THESE": [10, 21, 25], "thought": [10, 21, 25], "affect": [10, 21, 25], "hester": [10, 21, 25], "prynn": [10, 21, 25], "less": [10, 21, 25, 32, 37, 38], "hope": [10, 17, 21, 25], "than": [10, 16, 18, 19, 21, 24, 25, 26, 27, 32, 36, 37, 38], "apprehens": [10, 21, 25], "390": 10, "alwai": 10, "note": [10, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "down": [10, 16, 21, 24, 26, 27, 36, 37, 38], "reproduc": [10, 21], "ln": [10, 11, 16, 21, 24, 26, 27, 36, 37, 38], "9999": [10, 26, 27, 36], "symlink": 10, "pass": [10, 16, 18, 19, 21, 24, 26, 27, 34, 36, 37, 38], "max": [10, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "durat": [10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "600": [10, 21, 24, 26, 36, 37, 38], "reason": [10, 11, 37], "support": [11, 12, 16, 18, 21, 24, 26, 27, 34, 36, 37, 38], "both": [11, 24, 26, 27, 36, 37, 38], "perform": [11, 12, 18, 22, 37], "android": 11, "raspberri": [11, 36], "pi": [11, 36], "static": [11, 36], "produc": [11, 24, 26, 27, 36, 37, 38], "binari": [11, 16, 18, 19, 21, 24, 32, 36, 37], "everyth": 11, "zengwei": [11, 27, 36], "conv": 11, "emform": [11, 13], "stateless2": [11, 36], "07": [11, 16, 18, 19, 21], "ubuntu": 11, "04": [11, 16, 18, 19, 21, 25, 29, 30], "work": [11, 21], "pretrained_model": 11, "online_transduc": 11, "continu": [11, 16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "git_lfs_skip_smudg": 11, "includ": [11, 24, 26, 27, 36, 37, 38], "jit_xxx": 11, "anywher": 11, "submodul": [11, 36], "updat": [11, 36], "recurs": [11, 36], "init": [11, 36], "cmake": [11, 16, 21, 36], "dcmake_build_typ": [11, 16, 21, 36], "dncnn_python": [11, 36], "dncnn_build_benchmark": [11, 36], "off": [11, 36], "dncnn_build_exampl": [11, 36], "dncnn_build_tool": [11, 36], "j4": [11, 36], "pwd": [11, 36], "compon": [11, 34], "pnxx": 11, "execut": [11, 16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "locat": 11, "ncnn2int8": [11, 36], "our": [11, 13, 14, 21, 22, 24, 34, 37, 38], "cpython": 11, "38": [11, 16, 18, 21, 29], "x86_64": 11, "gnu": 11, "am": 11, "sai": [11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "But": [11, 24, 26, 27, 36, 37, 38], "doe": [11, 16, 18, 21, 32], "As": [11, 18, 21, 22], "long": 11, "later": [11, 16, 19, 21, 24, 25, 26, 27, 29, 30, 36, 37, 38], "termin": 11, "tencent": 11, "made": 11, "modif": [11, 18], "offic": 11, "synchron": 11, "offici": 11, "renam": 11, "conv_emformer_transducer_stateless2": [11, 34], "num": [11, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "layer": [11, 18, 22, 24, 34, 36, 37, 38], "chunk": [11, 37, 38], "length": [11, 18, 37, 38], "cnn": 11, "kernel": [11, 18], "left": [11, 18, 37, 38], "context": [11, 18, 24, 34, 36, 37, 38], "right": [11, 18, 34, 37], "memori": [11, 16, 18, 21, 34], "dim": [11, 18, 24, 37], "yourself": [11, 22, 37, 38], "tune": [11, 16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "combin": 11, "2023": 11, "677": 11, "681": 11, "229": [11, 16], "best_v": 11, "alid_epoch": 11, "subsampl": [11, 37, 38], "ing_factor": 11, "a34171ed85605b0926eebbd0463d059431f4f74a": 11, "wed": [11, 16, 18, 21], "dec": 11, "ver": 11, "ion": 11, "530e8a1": 11, "tue": [11, 21], "star": 11, "op": 11, "1220120619": 11, "7695ff496b": 11, "s9n4w": 11, "127": [11, 32], "icefa": 11, "ll": 11, "transdu": 11, "cer": 11, "use_averaged_model": 11, "cnn_module_kernel": 11, "left_context_length": 11, "chunk_length": 11, "right_context_length": 11, "memory_s": 11, "231": 11, "053": 11, "022": 11, "708": [11, 16, 18, 21, 32], "315": [11, 16, 18, 19, 21, 25], "75490012": 11, "318": 11, "320": [11, 18], "682": 11, "75": 11, "lh": 11, "rw": 11, "kuangfangjun": 11, "289m": 11, "jan": 11, "289": 11, "roughli": 11, "1010k": 11, "decoder_jit_trac": [11, 14, 36, 38], "283m": 11, "encoder_jit_trac": [11, 14, 36, 38], "0m": 11, "joiner_jit_trac": [11, 14, 36, 38], "sure": 11, "found": [11, 16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "param": [11, 36], "503k": 11, "437": 11, "142m": 11, "79k": 11, "5m": 11, "488": 11, "text": [11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "architectur": [11, 36], "editor": 11, "content": 11, "compar": [11, 37], "283": 11, "1010": 11, "142": [11, 16, 19, 21], "503": 11, "half": [11, 24, 37, 38], "joiner": [11, 12, 14, 18, 24, 36, 37, 38], "default": [11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "float32": 11, "float16": 11, "occupi": 11, "byte": 11, "twice": 11, "smaller": [11, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "convers": 11, "fp16": [11, 24, 26, 27, 36, 37, 38], "won": [11, 16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "token": [11, 16, 18, 19, 21, 25, 29, 30, 32], "accept": 11, "216": [11, 16, 21, 29, 30], "encoder_param_filenam": 11, "encoder_bin_filenam": 11, "decoder_param_filenam": 11, "decoder_bin_filenam": 11, "joiner_param_filenam": 11, "joiner_bin_filenam": 11, "sound_filenam": 11, "141": 11, "328": 11, "151": 11, "331": [11, 21, 25], "176": [11, 18, 21], "336": 11, "106000": [11, 21, 25], "581": [11, 25], "381": 11, "few": [11, 22], "7767517": 11, "1060": 11, "1342": 11, "in0": 11, "explan": 11, "three": [11, 14, 16, 18, 34], "magic": 11, "intermedi": 11, "mean": [11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 34, 36, 37, 38], "extra": [11, 18, 34, 37], "1061": 11, "sherpametadata": 11, "sherpa_meta_data1": 11, "still": 11, "sinc": [11, 22, 32, 36], "newli": 11, "must": [11, 37], "kei": [11, 21], "eas": 11, "list": [11, 16, 18, 19, 21, 25, 29, 30], "pair": 11, "accordingli": 11, "sad": 11, "No": [11, 32], "rememb": 11, "anymor": 11, "flexibl": 11, "edit": 11, "arm": 11, "aarch64": 11, "onc": 11, "mayb": 11, "year": 11, "todo": 11, "finish": [11, 16, 18, 19, 21, 22, 24, 25, 29, 30, 32, 37, 38], "tree": [12, 13, 14, 16, 18, 19, 21, 25, 29, 30, 32, 36], "insid": [12, 14], "encod": [12, 14, 15, 16, 18, 19, 21, 24, 25, 26, 32, 34, 36, 37, 38], "joiner_encoder_proj": 12, "joiner_decoder_proj": 12, "onnx_pretrain": 12, "proj": 12, "baz": [12, 14], "onnxruntim": 12, "torchscript": [13, 14, 15, 36], "cpu_jit": [13, 16, 21, 24, 26, 27, 37, 38], "confus": 13, "move": [13, 24, 26, 27, 37, 38], "why": 13, "streaming_asr": [13, 14, 36, 37, 38], "conv_emform": 13, "offline_asr": [13, 24], "lstm_transducer_stateless2": [14, 36], "468000": [14, 36], "jit_pretrain": [14, 26, 27, 36], "convemform": [15, 34], "pnnx": [15, 36], "option": [15, 18, 22, 25, 29, 30, 32], "int8": 15, "quantiz": [15, 22, 36], "tutori": [16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 36, 37, 38], "learn": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "singl": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "1best": [16, 19, 21, 25, 26, 27, 29, 30], "handl": [16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "automag": [16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "stop": [16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "control": [16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "By": [16, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "musan": [16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "thei": [16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "re": [16, 19, 21, 22, 24, 26, 27, 36, 37, 38], "intal": [16, 19], "initi": [16, 19], "sudo": [16, 19], "apt": [16, 19], "permiss": [16, 19], "commandlin": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "quit": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "often": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "experi": [16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "state": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "world": [16, 18, 19, 21, 22, 24, 25, 26, 27, 36, 37, 38], "multi": [16, 18, 19, 21, 22, 24, 26, 27, 34, 36, 37, 38], "machin": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "ddp": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "implement": [16, 18, 19, 21, 22, 24, 26, 27, 34, 36, 37, 38], "present": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "second": [16, 18, 19, 21, 22, 24, 26, 27, 32, 36, 37, 38], "over": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "utter": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "pad": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "oom": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "v100": [16, 18, 19, 21], "nvidia": [16, 18, 19, 21], "due": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "usual": [16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "larger": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "caus": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "increas": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "weight": [16, 19, 21, 26, 27, 36], "decai": [16, 19, 21, 26, 27, 36], "warmup": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "get_param": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "realli": [16, 19, 21, 24, 26, 27, 36, 37, 38], "directli": [16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "perturb": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "speed": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "factor": [16, 18, 19, 21, 22, 24, 26, 27, 36, 37, 38], "3x150": [16, 18, 19], "hour": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "These": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "rate": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "visual": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "logdir": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "labelsmooth": 16, "someth": [16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "tensorflow": [16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "press": [16, 18, 19, 21, 24, 26, 27, 32, 36, 37, 38], "ctrl": [16, 18, 19, 21, 24, 26, 27, 32, 36, 37, 38], "engw8ksktzqs24zbv5dgcg": 16, "22t11": 16, "scan": [16, 18, 19, 21, 24, 32, 36, 37], "116068": 16, "scalar": [16, 18, 19, 21, 24, 32, 36, 37], "listen": [16, 18, 19, 24, 32, 36, 37], "url": [16, 18, 19, 21, 24, 26, 27, 32, 36, 37], "xxxx": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "saw": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "consol": [16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "typic": [16, 18, 19, 21], "avoid": [16, 18, 21], "commonli": [16, 18, 19, 21, 25, 29, 30, 32], "nbest": [16, 21, 27], "scale": [16, 21, 22, 25, 27, 29, 30], "lattic": [16, 19, 21, 24, 25, 29, 30, 37, 38], "score": [16, 21, 24, 37, 38], "uniqu": [16, 21, 24, 37, 38], "pkufool": [16, 19, 25], "icefall_asr_aishell_conformer_ctc": 16, "transcrib": [16, 18, 19, 21], "lang_char": [16, 18], "word": [16, 18, 19, 21, 25, 29, 30, 32], "bac009s0764w0121": [16, 18, 19], "bac009s0764w0122": [16, 18, 19], "bac009s0764w0123": [16, 18, 19], "tran": [16, 19, 21, 25, 29, 30], "graph": [16, 19, 21, 24, 25, 29, 30, 37, 38], "id": [16, 19, 21, 25, 29, 30], "conveni": [16, 19, 21, 22], "eo": [16, 19, 21], "easili": [16, 19, 21], "obtain": [16, 18, 19, 21, 25, 29, 30], "84": 16, "soxi": [16, 18, 19, 21, 25, 32], "sampl": [16, 18, 19, 21, 25, 26, 32, 37, 38], "precis": [16, 18, 19, 21, 24, 25, 32, 37, 38], "bit": [16, 18, 19, 21, 25, 32], "67263": [16, 18, 19], "cdda": [16, 18, 19, 21, 25, 32], "sector": [16, 18, 19, 21, 25, 32], "135k": [16, 18, 19], "256k": [16, 18, 19, 21], "sign": [16, 18, 19, 21, 32], "integ": [16, 18, 19, 21, 32], "pcm": [16, 18, 19, 21, 32], "65840": [16, 18, 19], "625": [16, 18, 19], "132k": [16, 18, 19], "64000": [16, 18, 19], "300": [16, 18, 19, 21, 22, 24, 37], "128k": [16, 18, 19, 32], "displai": [16, 18, 19, 21], "topologi": [16, 21], "53": [16, 24, 25, 30, 36, 37], "707": [16, 21], "attention_dim": [16, 18, 21], "num_decoder_lay": [16, 21], "vgg_frontend": [16, 18, 21], "use_feat_batchnorm": [16, 21], "f2fd997f752ed11bbef4c306652c433e83f9cf12": 16, "sun": 16, "sep": 16, "46": [16, 21], "33cfe45": 16, "d57a873": 16, "nov": [16, 21], "hw": 16, "kangwei": 16, "icefall_aishell3": 16, "k2_releas": 16, "tokens_fil": 16, "words_fil": [16, 21, 32], "num_path": [16, 21, 24, 37, 38], "ngram_lm_scal": [16, 21], "attention_decoder_scal": [16, 21], "nbest_scal": [16, 21], "sos_id": [16, 21], "eos_id": [16, 21], "num_class": [16, 21, 32], "4336": [16, 18], "242": [16, 21], "131": [16, 21], "134": 16, "269": [16, 29, 30], "275": 16, "241": 16, "293": [16, 21], "704": [16, 29], "369": [16, 21], "\u751a": [16, 18], "\u81f3": [16, 18], "\u51fa": [16, 18], "\u73b0": [16, 18], "\u4ea4": [16, 18], "\u6613": [16, 18], "\u51e0": [16, 18], "\u4e4e": [16, 18], "\u505c": [16, 18], "\u6b62": 16, "\u7684": [16, 18, 19], "\u60c5": [16, 18], "\u51b5": [16, 18], "\u4e00": [16, 18], "\u4e8c": [16, 18], "\u7ebf": [16, 18, 19], "\u57ce": [16, 18], "\u5e02": [16, 18], "\u867d": [16, 18], "\u7136": [16, 18], "\u4e5f": [16, 18, 19], "\u5904": [16, 18], "\u4e8e": [16, 18], "\u8c03": [16, 18], "\u6574": [16, 18], "\u4e2d": [16, 18, 19], "\u4f46": [16, 18, 19], "\u56e0": [16, 18], "\u4e3a": [16, 18], "\u805a": [16, 18], "\u96c6": [16, 18], "\u4e86": [16, 18, 19], "\u8fc7": [16, 18], "\u591a": [16, 18], "\u516c": [16, 18], "\u5171": [16, 18], "\u8d44": [16, 18], "\u6e90": [16, 18], "371": 16, "37": [16, 18, 21, 29], "683": 16, "47": [16, 21], "651": [16, 32], "654": 16, "659": 16, "752": 16, "321": 16, "887": 16, "340": 16, "370": 16, "\u751a\u81f3": [16, 19], "\u51fa\u73b0": [16, 19], "\u4ea4\u6613": [16, 19], "\u51e0\u4e4e": [16, 19], "\u505c\u6b62": 16, "\u60c5\u51b5": [16, 19], "\u4e00\u4e8c": [16, 19], "\u57ce\u5e02": [16, 19], "\u867d\u7136": [16, 19], "\u5904\u4e8e": [16, 19], "\u8c03\u6574": [16, 19], "\u56e0\u4e3a": [16, 19], "\u805a\u96c6": [16, 19], "\u8fc7\u591a": [16, 19], "\u516c\u5171": [16, 19], "\u8d44\u6e90": [16, 19], "372": 16, "recor": [16, 21], "highest": [16, 21], "965": 16, "966": 16, "821": 16, "822": 16, "826": 16, "916": 16, "115": [16, 21], "345": 16, "888": 16, "889": 16, "limit": [16, 18, 21, 34, 37], "upgrad": [16, 21], "pro": [16, 21], "deploi": [16, 21], "At": [16, 21], "checkout": [16, 21], "v2": [16, 21], "j": [16, 21], "hlg_decod": [16, 21], "four": [16, 21], "messag": [16, 21, 24, 26, 27, 36, 37, 38], "nn_model": [16, 21], "use_gpu": [16, 21], "word_tabl": [16, 21], "caution": [16, 21], "forward": [16, 21, 26], "cu": [16, 21], "int": [16, 21], "char": [16, 21], "124": [16, 21], "98": 16, "150": [16, 21], "693": [16, 29], "165": [16, 21], "nnet_output": [16, 21], "182": [16, 25], "180": [16, 21], "489": 16, "45": [16, 18, 21], "mandarin": 17, "corpu": 17, "beij": 17, "shell": 17, "technologi": 17, "ltd": 17, "400": 17, "peopl": 17, "accent": 17, "area": 17, "china": 17, "invit": 17, "particip": 17, "conduct": 17, "quiet": 17, "indoor": 17, "high": 17, "fidel": 17, "microphon": 17, "downsampl": 17, "16khz": 17, "manual": 17, "accuraci": 17, "through": 17, "profession": 17, "annot": 17, "strict": 17, "inspect": 17, "free": [17, 22, 36], "academ": 17, "moder": 17, "amount": 17, "research": 17, "field": 17, "openslr": 17, "ctc": [17, 20, 23, 27, 28, 31], "stateless": [17, 20, 24, 36, 37, 38], "instead": [18, 37], "rnn": [18, 24, 26, 36, 37, 38], "head": [18, 34], "feedforward": [18, 24, 37], "embed": [18, 24, 36, 37, 38], "conv1d": [18, 24, 36, 37, 38], "nn": [18, 24, 26, 27, 36, 37, 38], "tanh": 18, "linear": 18, "borrow": 18, "ieeexplor": 18, "ieee": 18, "stamp": 18, "jsp": 18, "arnumb": 18, "9054419": 18, "predict": [18, 22, 24, 36, 37, 38], "charact": 18, "unit": 18, "vocabulari": 18, "87939824": 18, "88": 18, "optimized_transduc": 18, "technqiu": 18, "propos": [18, 34, 38], "improv": 18, "end": [18, 24, 26, 27, 32, 36, 37, 38], "furthermor": 18, "maximum": 18, "emit": 18, "per": [18, 24, 37, 38], "frame": [18, 24, 26, 37, 38], "simplifi": [18, 34], "significantli": 18, "degrad": 18, "exactli": 18, "benchmark": 18, "unprun": 18, "advantag": 18, "minim": 18, "pruned_transducer_stateless": [18, 24, 34, 37], "altern": 18, "though": 18, "transducer_stateless_modifi": 18, "pr": 18, "gb": 18, "ram": 18, "small": [18, 29, 30, 32], "tri": 18, "prob": [18, 36], "appli": [18, 34], "configur": [18, 22, 25, 29, 30, 32], "c": [18, 19, 24, 26, 27, 32, 36, 37, 38], "lagz6hrcqxoigbfd5e0y3q": 18, "03t14": 18, "8477": 18, "sym": [18, 24, 37, 38], "beam_search": [18, 24, 37, 38], "decoding_method": 18, "beam_4": 18, "28": [18, 21, 25], "ensur": 18, "give": 18, "poor": 18, "531": [18, 19], "994": [18, 21], "027": 18, "encoder_out_dim": 18, "f4fefe4882bc0ae59af951da3f47335d5495ef71": 18, "feb": 18, "50d2281": 18, "mar": 18, "0815224919": 18, "75d558775b": 18, "mmnv8": 18, "72": [18, 21], "878": [18, 30], "257": [18, 29, 30], "880": 18, "267": [18, 29, 30], "891": 18, "__floordiv__": 18, "x_len": 18, "163": [18, 21], "\u6ede": 18, "322": 18, "759": 18, "760": 18, "919": 18, "922": 18, "046": 18, "047": 18, "319": [18, 21], "214": [18, 21], "215": [18, 21, 25], "402": 18, "topk_hyp_index": 18, "topk_index": 18, "logit": 18, "583": [18, 30], "2000": 19, "lji9mwuorlow3jkdhxwk8a": 19, "13t11": 19, "4454": 19, "icefall_asr_aishell_tdnn_lstm_ctc": 19, "858": [19, 21], "389": [19, 21], "154": 19, "161": [19, 21], "536": 19, "171": [19, 21, 29, 30], "539": 19, "917": 19, "207": [19, 21], "129": 19, "\u505c\u6ede": 19, "222": [19, 21], "statelessx": [20, 23, 33, 34, 35], "zipform": [20, 23, 33, 35], "mmi": [20, 23], "blank": [20, 23], "skip": [20, 22, 23, 24, 36, 37, 38], "distil": [20, 23], "hubert": [20, 23], "ligru": [20, 28], "full": [21, 22, 24, 26, 27, 36, 37, 38], "libri": [21, 22, 24, 26, 27, 36, 37, 38], "960": [21, 24, 26, 27, 36, 37, 38], "subset": [21, 24, 26, 27, 36, 37, 38], "3x960": [21, 24, 26, 27, 36, 37, 38], "2880": [21, 24, 26, 27, 36, 37, 38], "lzgnetjwrxc3yghnmd4kpw": 21, "24t16": 21, "43": 21, "4540": 21, "sentenc": 21, "piec": 21, "And": [21, 24, 26, 27, 36, 37, 38], "neither": 21, "nor": 21, "vocab": 21, "5000": 21, "44": [21, 29, 30], "033": 21, "538": 21, "full_libri": [21, 22], "406": 21, "464": 21, "548": 21, "776": 21, "652": [21, 32], "109226120": 21, "714": [21, 29], "473": 21, "944": 21, "1328": 21, "54": [21, 25, 29, 30], "443": [21, 25], "2563": 21, "56": [21, 29], "494": 21, "592": 21, "1715": 21, "52576": 21, "1424": 21, "807": 21, "506": 21, "808": [21, 29], "522": 21, "362": 21, "565": 21, "1477": 21, "106": 21, "2922": 21, "208": 21, "4295": 21, "52343": 21, "396": 21, "3584": 21, "433": 21, "680": [21, 29], "_pickl": 21, "unpicklingerror": 21, "hlg_modifi": 21, "g_4_gram": [21, 25, 29, 30], "496": [21, 25], "875": [21, 25], "212k": 21, "267440": [21, 25], "1253": [21, 25], "535k": 21, "83": [21, 25], "77200": [21, 25], "361": [21, 25], "154k": 21, "554": 21, "260": 21, "7178d67e594bc7fa89c2b331ad7bd1c62a6a9eb4": 21, "8d93169": 21, "266": [21, 25], "268": [21, 25], "601": 21, "758": 21, "025": 21, "204": 21, "425": 21, "broffel": 21, "osom": 21, "427": 21, "723": 21, "775": 21, "881": 21, "352": 21, "234": 21, "384": 21, "whole": [21, 25, 29, 30, 37, 38], "ngram": [21, 25, 29, 30], "857": 21, "979": 21, "980": 21, "055": 21, "117": 21, "051": 21, "363": 21, "959": [21, 30], "546": 21, "598": 21, "599": [21, 25], "833": 21, "834": 21, "915": 21, "076": 21, "110": 21, "397": 21, "999": [21, 24, 37, 38], "concaten": 21, "bucket": 21, "sampler": 21, "1000": 21, "ctc_decod": 21, "ngram_lm_rescor": 21, "attention_rescor": 21, "kind": [21, 24, 26, 27, 36, 37, 38], "316": 21, "118": 21, "221": 21, "125": [21, 32], "136": 21, "144": 21, "159": [21, 32], "543": 21, "174": 21, "topo": 21, "547": 21, "729": 21, "111": 21, "702": 21, "703": 21, "545": 21, "122": 21, "280": 21, "135": [21, 32], "153": [21, 32], "945": 21, "475": 21, "191": [21, 29, 30], "398": 21, "199": [21, 25], "515": 21, "205": 21, "w": [21, 29, 30], "deseri": 21, "441": 21, "fsaclass": 21, "loadfsa": 21, "const": 21, "string": 21, "c10": 21, "ignor": 21, "dummi": 21, "589": 21, "attention_scal": 21, "656": 21, "162": 21, "169": [21, 29, 30], "188": 21, "624": 21, "519": [21, 30], "632": 21, "645": [21, 32], "243": 21, "970": 21, "303": 21, "179": 21, "totori": 22, "knowledg": 22, "vector": 22, "mvq": 22, "kd": 22, "paper": [22, 24, 36, 37, 38], "pruned_transducer_stateless4": [22, 24, 34, 37], "theoret": 22, "applic": 22, "minor": 22, "out": 22, "necessari": 22, "pruned_transducer_statelessx": 22, "thing": 22, "distillation_with_hubert": 22, "Of": 22, "cours": 22, "xl": 22, "know": 22, "proce": 22, "960h": 22, "use_extracted_codebook": 22, "augment": 22, "th": [22, 29, 30], "fine": 22, "embedding_lay": 22, "num_codebook": 22, "under": 22, "direcori": 22, "vq_fbank_layer36_cb8": 22, "whola": 22, "snippet": 22, "echo": 22, "awk": 22, "split": 22, "_": 22, "pruned_transducer_stateless6": 22, "12359": 22, "spec": 22, "aug": 22, "warp": 22, "enabl": 22, "argument": [22, 34], "paid": 22, "similar": [22, 26, 37, 38], "suitabl": [24, 36, 37, 38], "pruned_transducer_stateless2": [24, 34, 37], "pruned_transducer_stateless5": [24, 34, 37], "scroll": [24, 26, 27, 36, 37, 38], "scratch": [24, 26, 27, 36, 37, 38], "arxiv": [24, 36, 37, 38], "ab": [24, 36, 37, 38], "2206": [24, 36, 37, 38], "13236": [24, 36, 37, 38], "rework": [24, 34, 37], "daniel": [24, 37, 38], "joint": [24, 36, 37, 38], "contrari": [24, 36, 37, 38], "convent": [24, 36, 37, 38], "recurr": [24, 36, 37, 38], "2x": [24, 37, 38], "dimens": [24, 37, 38], "littl": [24, 37], "allow": [24, 37], "436000": [24, 26, 27, 36, 37, 38], "438000": [24, 26, 27, 36, 37, 38], "qogspbgsr8kzcrmmie9jgw": 24, "20t15": [24, 36, 37], "4468": [24, 36, 37], "210171": [24, 36, 37], "access": [24, 26, 27, 36, 37, 38], "6008": [24, 26, 27, 36, 37, 38], "localhost": [24, 26, 27, 36, 37, 38], "expos": [24, 26, 27, 36, 37, 38], "proxi": [24, 26, 27, 36, 37, 38], "bind_al": [24, 26, 27, 36, 37, 38], "suggest": [24, 26, 27, 36, 37, 38], "lowest": [24, 26, 27, 36, 37, 38], "fast_beam_search": [24, 26, 36, 37, 38], "474000": [24, 36, 37, 38], "largest": [24, 37, 38], "posterior": [24, 26, 37, 38], "algorithm": [24, 37, 38], "pdf": [24, 27, 37, 38], "1211": [24, 37, 38], "3711": [24, 37, 38], "espnet": [24, 37, 38], "net": [24, 37, 38], "beam_search_transduc": [24, 37, 38], "basicli": [24, 37, 38], "topk": [24, 37, 38], "expand": [24, 37, 38], "mode": [24, 37, 38], "being": [24, 37, 38], "hardcod": [24, 37, 38], "composit": [24, 37, 38], "between": [24, 37, 38], "log_prob": [24, 37, 38], "hard": [24, 34, 37, 38], "2211": [24, 37, 38], "00484": [24, 37, 38], "rnnt": [24, 37, 38], "effici": [24, 37, 38], "fast_beam_search_lg": [24, 37, 38], "trivial": [24, 37, 38], "fast_beam_search_nbest": [24, 37, 38], "random_path": [24, 37, 38], "shortest": [24, 37, 38], "fast_beam_search_nbest_lg": [24, 37, 38], "logic": [24, 37, 38], "smallest": [24, 36, 37, 38], "icefall_asr_librispeech_tdnn": 25, "lstm_ctc": 25, "flac": 25, "116k": 25, "140k": 25, "343k": 25, "164k": 25, "105k": 25, "174k": 25, "pretraind": 25, "168": 25, "170": 25, "584": [25, 30], "209": 25, "791": 25, "245": 25, "099": 25, "methond": [25, 29, 30], "725": 25, "403": 25, "631": 25, "010": 25, "guidanc": 26, "calcul": [26, 37, 38], "bigger": 26, "threshold": 26, "simpli": 26, "discard": 26, "prevent": 26, "convolut": [26, 34, 37], "lconv": 26, "encourag": [26, 27, 36], "stabil": [26, 27], "doesn": 26, "warm": [26, 27], "pruned_transducer_stateless7_ctc_b": 26, "xyozukpeqm62hbilud4upa": [26, 27], "ctc_guild_decode_b": 26, "pretrained_ctc": 26, "jit_pretrained_ctc": 26, "yfyeung": 26, "wechat": 27, "zipformer_mmi": 27, "worker": [27, 36], "hp": 27, "tdnn_ligru_ctc": 29, "enough": [29, 30, 32], "luomingshuang": [29, 30], "icefall_asr_timit_tdnn_ligru_ctc": 29, "pretrained_average_9_25": 29, "fdhc0_si1559": [29, 30], "felc0_si756": [29, 30], "fmgd0_si1564": [29, 30], "ffprobe": [29, 30], "show_format": [29, 30], "nistspher": [29, 30], "database_id": [29, 30], "database_vers": [29, 30], "utterance_id": [29, 30], "dhc0_si1559": [29, 30], "sample_min": [29, 30], "4176": [29, 30], "sample_max": [29, 30], "5984": [29, 30], "bitrat": [29, 30], "258": [29, 30], "audio": [29, 30], "pcm_s16le": [29, 30], "hz": [29, 30], "s16": [29, 30], "256": [29, 30], "elc0_si756": [29, 30], "1546": [29, 30], "1989": [29, 30], "mgd0_si1564": [29, 30], "7626": [29, 30], "10573": [29, 30], "660": 29, "183": [29, 30], "695": 29, "697": 29, "210": [29, 30], "829": 29, "sil": [29, 30], "dh": [29, 30], "ih": [29, 30], "uw": [29, 30], "ah": [29, 30], "ii": [29, 30], "z": [29, 30], "aa": [29, 30], "ei": [29, 30], "dx": [29, 30], "uh": [29, 30], "ng": [29, 30], "eh": [29, 30], "jh": [29, 30], "er": [29, 30], "ai": [29, 30], "hh": [29, 30], "aw": 29, "ae": [29, 30], "705": 29, "715": 29, "720": 29, "251": [29, 30], "348": 29, "ch": 29, "icefall_asr_timit_tdnn_lstm_ctc": 30, "pretrained_average_16_25": 30, "816": 30, "827": 30, "387": 30, "unk": 30, "739": 30, "971": 30, "977": 30, "978": 30, "981": 30, "ow": 30, "ykubhb5wrmosxykid1z9eg": 32, "23t23": 32, "icefall_asr_yesno_tdnn": 32, "l_disambig": 32, "lexicon_disambig": 32, "arpa": 32, "0_0_0_1_0_0_0_1": 32, "0_0_1_0_0_0_1_0": 32, "0_0_1_0_0_1_1_1": 32, "0_0_1_0_1_0_0_1": 32, "0_0_1_1_0_0_0_1": 32, "0_0_1_1_0_1_1_0": 32, "0_0_1_1_1_0_0_0": 32, "0_0_1_1_1_1_0_0": 32, "0_1_0_0_0_1_0_0": 32, "0_1_0_0_1_0_1_0": 32, "0_1_0_1_0_0_0_0": 32, "0_1_0_1_1_1_0_0": 32, "0_1_1_0_0_1_1_1": 32, "0_1_1_1_0_0_1_0": 32, "0_1_1_1_1_0_1_0": 32, "1_0_0_0_0_0_0_0": 32, "1_0_0_0_0_0_1_1": 32, "1_0_0_1_0_1_1_1": 32, "1_0_1_1_0_1_1_1": 32, "1_0_1_1_1_1_0_1": 32, "1_1_0_0_0_1_1_1": 32, "1_1_0_0_1_0_1_1": 32, "1_1_0_1_0_1_0_0": 32, "1_1_0_1_1_0_0_1": 32, "1_1_0_1_1_1_1_0": 32, "1_1_1_0_0_1_0_1": 32, "1_1_1_0_1_0_1_0": 32, "1_1_1_1_0_0_1_0": 32, "1_1_1_1_1_0_0_0": 32, "1_1_1_1_1_1_1_1": 32, "54080": 32, "507": 32, "108k": 32, "ye": 32, "hebrew": 32, "NO": 32, "621": 32, "119": 32, "650": 32, "139": 32, "143": 32, "198": 32, "181": 32, "186": 32, "187": 32, "287": 32, "correctli": 32, "simplest": 32, "former": 34, "idea": 34, "achiev": 34, "mask": [34, 37, 38], "wenet": 34, "did": 34, "adapt": 34, "complic": 34, "techniqu": 34, "bank": 34, "memor": 34, "histori": 34, "introduc": 34, "variant": 34, "pruned_stateless_emformer_rnnt2": 34, "conv_emformer_transducer_stateless": 34, "ourself": 34, "mechan": 34, "onlin": 36, "lstm_transducer_stateless": 36, "lower": 36, "prepare_giga_speech": 36, "cj2vtpiwqhkn9q1tx6ptpg": 36, "hidden": 36, "1024": 36, "abl": 36, "third": 36, "extern": 36, "dynam": [37, 38], "causal": 37, "short": [37, 38], "2012": 37, "05481": 37, "flag": 37, "indic": [37, 38], "whether": 37, "sequenc": [37, 38], "uniformli": [37, 38], "seen": [37, 38], "97vkxf80ru61cnp2alwzzg": 37, "streaming_decod": [37, 38], "acoust": [37, 38], "wise": [37, 38], "equal": [37, 38], "where": 37, "parallel": [37, 38], "bath": [37, 38], "parallelli": [37, 38], "seem": 37, "benefit": 37, "might": [37, 38], "mismatch": 37, "mdoel": 37, "pruned_transducer_stateless7_stream": 38, "len": 38, "320m": 38, "550": 38, "scriptmodul": 38, "jit_trace_export": 38, "jit_trace_pretrain": 38, "task": 39}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"follow": 0, "code": 0, "style": 0, "contribut": [1, 3], "document": 1, "how": [2, 10, 12, 13, 14], "creat": [2, 9], "recip": [2, 39], "data": [2, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "prepar": [2, 9, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "train": [2, 6, 9, 11, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "decod": [2, 9, 10, 16, 18, 19, 21, 22, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "pre": [2, 6, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "model": [2, 6, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "frequent": 4, "ask": 4, "question": 4, "faq": 4, "oserror": 4, "libtorch_hip": 4, "so": 4, "cannot": 4, "open": 4, "share": 4, "object": 4, "file": 4, "directori": 4, "attributeerror": 4, "modul": 4, "distutil": 4, "ha": 4, "attribut": 4, "version": 4, "huggingfac": [5, 7], "space": 7, "youtub": [7, 9], "video": [7, 9], "icefal": [8, 9, 11], "content": [8, 39], "instal": [9, 11, 16, 18, 19, 21, 25, 29, 30], "0": 9, "pytorch": 9, "torchaudio": 9, "1": [9, 11, 16, 18, 19, 21], "k2": 9, "2": [9, 11, 16, 18, 19, 21], "lhots": 9, "3": [9, 11, 16, 18, 21], "download": [9, 11, 16, 18, 19, 21, 24, 25, 26, 27, 29, 30, 32, 36, 37, 38], "exampl": [9, 16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "virtual": 9, "environ": 9, "activ": 9, "your": 9, "4": [9, 11], "5": [9, 11], "test": [9, 11], "export": [10, 11, 12, 13, 14, 15, 24, 26, 27, 36, 37, 38], "state_dict": [10, 24, 26, 27, 36, 37, 38], "when": [10, 12, 13, 14], "us": [10, 12, 13, 14, 24, 26, 27, 36, 37, 38], "run": 10, "py": 10, "ncnn": [11, 36], "lstm": [11, 19, 25, 30, 36], "transduc": [11, 18, 24, 36, 37, 38], "convemform": [11, 36], "pnnx": 11, "via": 11, "torch": [11, 13, 14, 24, 26, 27, 36, 37, 38], "jit": [11, 13, 14, 24, 26, 27, 36, 37, 38], "trace": [11, 14, 36, 38], "torchscript": 11, "modifi": [11, 18], "encod": 11, "sherpa": [11, 24, 37, 38], "6": 11, "option": [11, 16, 19, 21, 24, 26, 27, 36, 37, 38], "int8": 11, "quantiz": 11, "onnx": 12, "script": [13, 24, 26, 27, 37, 38], "conform": [16, 21, 34], "ctc": [16, 19, 21, 25, 26, 29, 30, 32], "configur": [16, 19, 21, 24, 26, 27, 36, 37, 38], "log": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "usag": [16, 18, 19, 21, 24, 26, 27, 36, 37, 38], "case": [16, 18, 19, 21], "kaldifeat": [16, 18, 19, 21, 25, 29, 30, 32], "hlg": [16, 19, 21], "attent": [16, 21], "rescor": [16, 21], "colab": [16, 18, 19, 21, 25, 29, 30, 32], "notebook": [16, 18, 19, 21, 25, 29, 30, 32], "deploy": [16, 21], "c": [16, 21], "aishel": 17, "stateless": 18, "The": 18, "loss": 18, "todo": 18, "greedi": 18, "search": 18, "beam": 18, "tdnn": [19, 25, 29, 30, 32], "non": 20, "stream": [20, 33, 34, 37, 38], "asr": [20, 33], "lm": 21, "comput": 21, "wer": 21, "n": 21, "gram": 21, "distil": 22, "hubert": 22, "codebook": 22, "index": 22, "librispeech": [23, 35], "prune": [24, 37], "statelessx": [24, 37], "pretrain": [24, 26, 27, 36, 37, 38], "deploi": [24, 37, 38], "infer": [25, 29, 30, 32], "zipform": [26, 27, 38], "blank": 26, "skip": 26, "mmi": 27, "timit": 28, "ligru": 29, "yesno": 31, "introduct": 34, "emform": 34, "which": 36, "simul": [37, 38], "real": [37, 38], "tabl": 39}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx": 57}, "alltitles": {"Follow the code style": [[0, "follow-the-code-style"]], "Contributing to Documentation": [[1, "contributing-to-documentation"]], "How to create a recipe": [[2, "how-to-create-a-recipe"]], "Data Preparation": [[2, "data-preparation"], [18, "data-preparation"]], "Training": [[2, "training"], [9, "training"], [16, "training"], [18, "training"], [19, "training"], [21, "training"], [22, "training"], [24, "training"], [25, "training"], [26, "training"], [27, "training"], [29, "training"], [30, "training"], [32, "training"], [36, "training"], [37, "training"], [38, "training"]], "Decoding": [[2, "decoding"], [9, "decoding"], [16, "decoding"], [18, "decoding"], [19, "decoding"], [21, "decoding"], [22, "decoding"], [24, "decoding"], [25, "decoding"], [26, "decoding"], [27, "decoding"], [29, "decoding"], [30, "decoding"], [32, "decoding"], [36, "decoding"], [37, "decoding"], [38, "decoding"]], "Pre-trained model": [[2, "pre-trained-model"]], "Contributing": [[3, "contributing"]], "Frequently Asked Questions (FAQs)": [[4, "frequently-asked-questions-faqs"]], "OSError: libtorch_hip.so: cannot open shared object file: no such file or directory": [[4, "oserror-libtorch-hip-so-cannot-open-shared-object-file-no-such-file-or-directory"]], "AttributeError: module \u2018distutils\u2019 has no attribute \u2018version\u2019": [[4, "attributeerror-module-distutils-has-no-attribute-version"]], "Huggingface": [[5, "huggingface"]], "Pre-trained models": [[6, "pre-trained-models"]], "Huggingface spaces": [[7, "huggingface-spaces"]], "YouTube Video": [[7, "youtube-video"], [9, "youtube-video"]], "Icefall": [[8, "icefall"]], "Contents:": [[8, null]], "Installation": [[9, "installation"]], "(0) Install PyTorch and torchaudio": [[9, "install-pytorch-and-torchaudio"]], "(1) Install k2": [[9, "install-k2"]], "(2) Install lhotse": [[9, "install-lhotse"]], "(3) Download icefall": [[9, "download-icefall"]], "Installation example": [[9, "installation-example"]], "(1) Create a virtual environment": [[9, "create-a-virtual-environment"]], "(2) Activate your virtual environment": [[9, "activate-your-virtual-environment"]], "(3) Install k2": [[9, "id1"]], "(4) Install lhotse": [[9, "id2"]], "(5) Download icefall": [[9, "id3"]], "Test Your Installation": [[9, "test-your-installation"]], "Data preparation": [[9, "data-preparation"], [16, "data-preparation"], [19, "data-preparation"], [21, "data-preparation"], [22, "data-preparation"], [24, "data-preparation"], [25, "data-preparation"], [26, "data-preparation"], [27, "data-preparation"], [29, "data-preparation"], [30, "data-preparation"], [32, "data-preparation"], [36, "data-preparation"], [37, "data-preparation"], [38, "data-preparation"]], "Export model.state_dict()": [[10, "export-model-state-dict"], [24, "export-model-state-dict"], [26, "export-model-state-dict"], [27, "export-model-state-dict"], [36, "export-model-state-dict"], [37, "export-model-state-dict"], [38, "export-model-state-dict"]], "When to use it": [[10, "when-to-use-it"], [12, "when-to-use-it"], [13, "when-to-use-it"], [14, "when-to-use-it"]], "How to export": [[10, "how-to-export"], [12, "how-to-export"], [13, "how-to-export"], [14, "how-to-export"]], "How to use the exported model": [[10, "how-to-use-the-exported-model"], [12, "how-to-use-the-exported-model"], [13, "how-to-use-the-exported-model"]], "Use the exported model to run decode.py": [[10, "use-the-exported-model-to-run-decode-py"]], "Export to ncnn": [[11, "export-to-ncnn"]], "Export LSTM transducer models": [[11, "export-lstm-transducer-models"]], "Export ConvEmformer transducer models": [[11, "export-convemformer-transducer-models"]], "1. Download the pre-trained model": [[11, "download-the-pre-trained-model"]], "2. Install ncnn and pnnx": [[11, "install-ncnn-and-pnnx"]], "3. Export the model via torch.jit.trace()": [[11, "export-the-model-via-torch-jit-trace"]], "3. Export torchscript model via pnnx": [[11, "export-torchscript-model-via-pnnx"]], "4. Test the exported models in icefall": [[11, "test-the-exported-models-in-icefall"]], "5. Modify the exported encoder for sherpa-ncnn": [[11, "modify-the-exported-encoder-for-sherpa-ncnn"]], "6. (Optional) int8 quantization with sherpa-ncnn": [[11, "optional-int8-quantization-with-sherpa-ncnn"]], "Export to ONNX": [[12, "export-to-onnx"]], "Export model with torch.jit.script()": [[13, "export-model-with-torch-jit-script"]], "Export model with torch.jit.trace()": [[14, "export-model-with-torch-jit-trace"]], "How to use the exported models": [[14, "how-to-use-the-exported-models"]], "Model export": [[15, "model-export"]], "Conformer CTC": [[16, "conformer-ctc"], [21, "conformer-ctc"]], "Configurable options": [[16, "configurable-options"], [19, "configurable-options"], [21, "configurable-options"], [24, "configurable-options"], [26, "configurable-options"], [27, "configurable-options"], [36, "configurable-options"], [37, "configurable-options"], [38, "configurable-options"]], "Pre-configured options": [[16, "pre-configured-options"], [19, "pre-configured-options"], [21, "pre-configured-options"], [24, "pre-configured-options"], [26, "pre-configured-options"], [27, "pre-configured-options"], [36, "pre-configured-options"], [37, "pre-configured-options"], [38, "pre-configured-options"]], "Training logs": [[16, "training-logs"], [18, "training-logs"], [19, "training-logs"], [21, "training-logs"], [24, "training-logs"], [26, "training-logs"], [27, "training-logs"], [36, "training-logs"], [37, "training-logs"], [38, "training-logs"]], "Usage examples": [[16, "usage-examples"], [18, "usage-examples"], [19, "usage-examples"], [21, "usage-examples"]], "Case 1": [[16, "case-1"], [18, "case-1"], [19, "case-1"], [21, "case-1"]], "Case 2": [[16, "case-2"], [18, "case-2"], [19, "case-2"], [21, "case-2"]], "Case 3": [[16, "case-3"], [18, "case-3"], [21, "case-3"]], "Pre-trained Model": [[16, "pre-trained-model"], [18, "pre-trained-model"], [19, "pre-trained-model"], [21, "pre-trained-model"], [25, "pre-trained-model"], [29, "pre-trained-model"], [30, "pre-trained-model"], [32, "pre-trained-model"]], "Install kaldifeat": [[16, "install-kaldifeat"], [18, "install-kaldifeat"], [19, "install-kaldifeat"], [21, "install-kaldifeat"], [25, "install-kaldifeat"], [29, "install-kaldifeat"], [30, "install-kaldifeat"]], "Download the pre-trained model": [[16, "download-the-pre-trained-model"], [18, "download-the-pre-trained-model"], [19, "download-the-pre-trained-model"], [21, "download-the-pre-trained-model"], [25, "download-the-pre-trained-model"], [29, "download-the-pre-trained-model"], [30, "download-the-pre-trained-model"], [32, "download-the-pre-trained-model"]], "Usage": [[16, "usage"], [18, "usage"], [19, "usage"], [21, "usage"]], "CTC decoding": [[16, "ctc-decoding"], [21, "ctc-decoding"], [21, "id2"]], "HLG decoding": [[16, "hlg-decoding"], [16, "id2"], [19, "hlg-decoding"], [21, "hlg-decoding"], [21, "id3"]], "HLG decoding + attention decoder rescoring": [[16, "hlg-decoding-attention-decoder-rescoring"]], "Colab notebook": [[16, "colab-notebook"], [18, "colab-notebook"], [19, "colab-notebook"], [21, "colab-notebook"], [25, "colab-notebook"], [29, "colab-notebook"], [30, "colab-notebook"], [32, "colab-notebook"]], "Deployment with C++": [[16, "deployment-with-c"], [21, "deployment-with-c"]], "aishell": [[17, "aishell"]], "Stateless Transducer": [[18, "stateless-transducer"]], "The Model": [[18, "the-model"]], "The Loss": [[18, "the-loss"]], "Todo": [[18, "id1"]], "Greedy search": [[18, "greedy-search"]], "Beam search": [[18, "beam-search"]], "Modified Beam search": [[18, "modified-beam-search"]], "TDNN-LSTM CTC": [[19, "tdnn-lstm-ctc"]], "Non Streaming ASR": [[20, "non-streaming-asr"]], "HLG decoding + LM rescoring": [[21, "hlg-decoding-lm-rescoring"]], "HLG decoding + LM rescoring + attention decoder rescoring": [[21, "hlg-decoding-lm-rescoring-attention-decoder-rescoring"]], "Compute WER with the pre-trained model": [[21, "compute-wer-with-the-pre-trained-model"]], "HLG decoding + n-gram LM rescoring": [[21, "hlg-decoding-n-gram-lm-rescoring"]], "HLG decoding + n-gram LM rescoring + attention decoder rescoring": [[21, "hlg-decoding-n-gram-lm-rescoring-attention-decoder-rescoring"]], "Distillation with HuBERT": [[22, "distillation-with-hubert"]], "Codebook index preparation": [[22, "codebook-index-preparation"]], "LibriSpeech": [[23, "librispeech"], [35, "librispeech"]], "Pruned transducer statelessX": [[24, "pruned-transducer-statelessx"], [37, "pruned-transducer-statelessx"]], "Usage example": [[24, "usage-example"], [26, "usage-example"], [27, "usage-example"], [36, "usage-example"], [37, "usage-example"], [38, "usage-example"]], "Export Model": [[24, "export-model"], [37, "export-model"], [38, "export-model"]], "Export model using torch.jit.script()": [[24, "export-model-using-torch-jit-script"], [26, "export-model-using-torch-jit-script"], [27, "export-model-using-torch-jit-script"], [37, "export-model-using-torch-jit-script"], [38, "export-model-using-torch-jit-script"]], "Download pretrained models": [[24, "download-pretrained-models"], [26, "download-pretrained-models"], [27, "download-pretrained-models"], [36, "download-pretrained-models"], [37, "download-pretrained-models"], [38, "download-pretrained-models"]], "Deploy with Sherpa": [[24, "deploy-with-sherpa"], [37, "deploy-with-sherpa"], [38, "deploy-with-sherpa"]], "TDNN-LSTM-CTC": [[25, "tdnn-lstm-ctc"], [30, "tdnn-lstm-ctc"]], "Inference with a pre-trained model": [[25, "inference-with-a-pre-trained-model"], [29, "inference-with-a-pre-trained-model"], [30, "inference-with-a-pre-trained-model"], [32, "inference-with-a-pre-trained-model"]], "Zipformer CTC Blank Skip": [[26, "zipformer-ctc-blank-skip"]], "Export models": [[26, "export-models"], [27, "export-models"], [36, "export-models"]], "Zipformer MMI": [[27, "zipformer-mmi"]], "TIMIT": [[28, "timit"]], "TDNN-LiGRU-CTC": [[29, "tdnn-ligru-ctc"]], "YesNo": [[31, "yesno"]], "TDNN-CTC": [[32, "tdnn-ctc"]], "Download kaldifeat": [[32, "download-kaldifeat"]], "Streaming ASR": [[33, "streaming-asr"]], "Introduction": [[34, "introduction"]], "Streaming Conformer": [[34, "streaming-conformer"]], "Streaming Emformer": [[34, "streaming-emformer"]], "LSTM Transducer": [[36, "lstm-transducer"]], "Which model to use": [[36, "which-model-to-use"]], "Export model using torch.jit.trace()": [[36, "export-model-using-torch-jit-trace"], [38, "export-model-using-torch-jit-trace"]], "Export LSTM transducer models for ncnn": [[36, "export-lstm-transducer-models-for-ncnn"]], "Export ConvEmformer transducer models for ncnn": [[36, "export-convemformer-transducer-models-for-ncnn"]], "Simulate streaming decoding": [[37, "simulate-streaming-decoding"], [38, "simulate-streaming-decoding"]], "Real streaming decoding": [[37, "real-streaming-decoding"], [38, "real-streaming-decoding"]], "Zipformer Transducer": [[38, "zipformer-transducer"]], "Recipes": [[39, "recipes"]], "Table of Contents": [[39, null]]}, "indexentries": {}}) |