From 0b656e4e1c82093b90ca037579a5862eaf7483b4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 20 Aug 2021 15:43:25 +0800 Subject: [PATCH] Add a link to Colab. (#14) It demonstrates the usages of pre-trained models. --- README.md | 7 ++++++ egs/librispeech/ASR/conformer_ctc/README.md | 24 ++++++++++++++----- .../ASR/conformer_ctc/pretrained.py | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 91c1f67a9..b49a7f04c 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,13 @@ It should print the path to `icefall`. At present, only LibriSpeech recipe is provided. Please follow [egs/librispeech/ASR/README.md][LibriSpeech] to run it. +## Use Pre-trained models + +See [egs/librispeech/ASR/conformer_ctc/README.md](egs/librispeech/ASR/conformer_ctc/README.md) +for how to use pre-trained models. +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing) + + [LibriSpeech]: egs/librispeech/ASR/README.md [k2-install]: https://k2.readthedocs.io/en/latest/installation/index.html# [k2]: https://github.com/k2-fsa/k2 diff --git a/egs/librispeech/ASR/conformer_ctc/README.md b/egs/librispeech/ASR/conformer_ctc/README.md index a02ec35af..130d21351 100644 --- a/egs/librispeech/ASR/conformer_ctc/README.md +++ b/egs/librispeech/ASR/conformer_ctc/README.md @@ -1,6 +1,8 @@ # How to use a pre-trained model to transcribe a sound file or multiple sound files +(See the bottom of this document for the link to a colab notebook.) + You need to prepare 4 files: - a model checkpoint file, e.g., epoch-20.pt @@ -99,22 +101,25 @@ The command to run decoding with attention decoder rescoring is: /path/to/your/sound3.wav ``` -# Decoding with a pretrained model in action +# Decoding with a pre-trained model in action -We have uploaded a pretrained model to +We have uploaded a pre-trained model to -The following shows the steps about the usage of the provided pretrained model. +The following shows the steps about the usage of the provided pre-trained model. -### (1) Download the pretrained model +### (1) Download the pre-trained model ```bash +sudo apt-get install git-lfs cd /path/to/icefall/egs/librispeech/ASR +git lfs install mkdir tmp cd tmp git clone https://huggingface.co/pkufool/conformer_ctc - ``` +**CAUTION**: You have to install `git-lfst` to download the pre-trained model. + You will find the following files: ``` @@ -165,7 +170,7 @@ tmp - `exp/pretrained.pt` - It contains pretrained model parameters, obtained by averaging + It contains pre-trained model parameters, obtained by averaging checkpoints from `epoch-15.pt` to `epoch-34.pt`. Note: We have removed optimizer `state_dict` to reduce file size. @@ -337,3 +342,10 @@ YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION 2021-08-20 11:20:05,805 INFO [pretrained.py:341] Decoding Done ``` + +**NOTE**: We provide a colab notebook for demonstration. +[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing) + +Due to limited memory provided by Colab, you have to upgrade to Colab Pro to +run `HLG decoding + LM rescoring` and `HLG decoding + LM rescoring + attention decoder rescoring`. +Otherwise, you can only run `HLG decoding` with Colab. diff --git a/egs/librispeech/ASR/conformer_ctc/pretrained.py b/egs/librispeech/ASR/conformer_ctc/pretrained.py index fbdeb39b5..c63616d28 100755 --- a/egs/librispeech/ASR/conformer_ctc/pretrained.py +++ b/egs/librispeech/ASR/conformer_ctc/pretrained.py @@ -245,11 +245,11 @@ def main(): if params.method in ["whole-lattice-rescoring", "attention-decoder"]: logging.info(f"Loading G from {params.G}") G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu")) + G = G.to(device) # Add epsilon self-loops to G as we will compose # it with the whole lattice later G = k2.add_epsilon_self_loops(G) G = k2.arc_sort(G) - G = G.to(device) G.lm_scores = G.scores.clone() logging.info("Constructing Fbank computer")