diff --git a/egs/librispeech/ASR/conformer_ctc/README.md b/egs/librispeech/ASR/conformer_ctc/README.md index 23b51167b..164c3e53e 100644 --- a/egs/librispeech/ASR/conformer_ctc/README.md +++ b/egs/librispeech/ASR/conformer_ctc/README.md @@ -1,3 +1,53 @@ +## Introduction + Please visit for how to run this recipe. + +## How to compute framewise alignment information + +### Step 1: Train a model + +Please use `conformer_ctc/train.py` to train a model. +See +for how to do it. + +### Step 2: Compute framewise alignment + +Run + +``` +# Choose a checkpoint and determine the number of checkpoints to average +epoch=30 +avg=15 +./conformer_ctc/ali.py \ + --epoch $epoch \ + --avg $avg \ + --max-duration 500 \ + --bucketing-sampler 0 \ + --full-libri 1 \ + --exp-dir conformer_ctc/exp \ + --lang-dir data/lang_bpe_5000 \ + --ali-dir data/ali_5000 +``` +and you will get four files inside the folder `data/ali_5000`: + +``` +$ ls -lh data/ali_500 +total 546M +-rw-r--r-- 1 kuangfangjun root 1.1M Sep 28 08:06 test_clean.pt +-rw-r--r-- 1 kuangfangjun root 1.1M Sep 28 08:07 test_other.pt +-rw-r--r-- 1 kuangfangjun root 542M Sep 28 11:36 train-960.pt +-rw-r--r-- 1 kuangfangjun root 2.1M Sep 28 11:38 valid.pt +``` + +**Note**: It can take more than 3 hours to compute the alignment +for the training dataset, which contains 960 * 3 = 2880 hours of data. + +**Caution**: The model parameters in `conformer_ctc/ali.py` have to match those +in `conformer_ctc/train.py`. + +**Caution**: You have to set the parameter `preserve_id` to `True` for `CutMix`. +Search `./conformer_ctc/asr_datamodule.py` for `preserve_id`. + +**TODO:** Add doc about how to use the extracted alignment in the other pull-request. diff --git a/egs/librispeech/ASR/conformer_ctc/ali.py b/egs/librispeech/ASR/conformer_ctc/ali.py index c79c4e277..3d817a8f6 100755 --- a/egs/librispeech/ASR/conformer_ctc/ali.py +++ b/egs/librispeech/ASR/conformer_ctc/ali.py @@ -33,6 +33,7 @@ from icefall.utils import ( AttributeDict, encode_supervisions, get_alignments, + get_env_info, save_alignments, setup_logger, ) @@ -62,7 +63,7 @@ def get_parser(): parser.add_argument( "--lang-dir", type=str, - default="data/lang_bpe", + default="data/lang_bpe_5000", help="The lang dir", ) @@ -95,6 +96,7 @@ def get_params() -> AttributeDict: "use_feat_batchnorm": True, "output_beam": 10, "use_double_scores": True, + "env_info": get_env_info(), } ) return params diff --git a/egs/librispeech/ASR/conformer_ctc/decode.py b/egs/librispeech/ASR/conformer_ctc/decode.py index 3fb5d262d..bddb832b0 100755 --- a/egs/librispeech/ASR/conformer_ctc/decode.py +++ b/egs/librispeech/ASR/conformer_ctc/decode.py @@ -143,7 +143,7 @@ def get_parser(): parser.add_argument( "--lang-dir", type=str, - default="data/lang_bpe", + default="data/lang_bpe_5000", help="The lang dir", ) diff --git a/egs/librispeech/ASR/conformer_ctc/export.py b/egs/librispeech/ASR/conformer_ctc/export.py index 8241c84c1..79e026dac 100755 --- a/egs/librispeech/ASR/conformer_ctc/export.py +++ b/egs/librispeech/ASR/conformer_ctc/export.py @@ -65,7 +65,7 @@ def get_parser(): parser.add_argument( "--lang-dir", type=str, - default="data/lang_bpe", + default="data/lang_bpe_5000", help="""It contains language related input files such as "lexicon.txt" """, ) diff --git a/egs/librispeech/ASR/conformer_ctc/train.py b/egs/librispeech/ASR/conformer_ctc/train.py index b76be8641..ae088620f 100755 --- a/egs/librispeech/ASR/conformer_ctc/train.py +++ b/egs/librispeech/ASR/conformer_ctc/train.py @@ -115,7 +115,7 @@ def get_parser(): parser.add_argument( "--lang-dir", type=str, - default="data/lang_bpe", + default="data/lang_bpe_5000", help="""The lang dir It contains language related input files such as "lexicon.txt"