Add doc about how to extract framewise alignments.

2021-10-18 14:21:21 +08:00 · 2021-10-18 14:21:21 +08:00 · 07140e5d5c
commit 07140e5d5c
parent 1c603c3bce
5 changed files with 56 additions and 4 deletions
--- a/egs/librispeech/ASR/conformer_ctc/README.md
+++ b/egs/librispeech/ASR/conformer_ctc/README.md
@ -1,3 +1,53 @@
+## Introduction
+
 Please visit
 <https://icefall.readthedocs.io/en/latest/recipes/librispeech/conformer_ctc.html>
 for how to run this recipe.
+
+## How to compute framewise alignment information
+
+### Step 1: Train a model
+
+Please use `conformer_ctc/train.py` to train a model.
+See <https://icefall.readthedocs.io/en/latest/recipes/librispeech/conformer_ctc.html>
+for how to do it.
+
+### Step 2: Compute framewise alignment
+
+Run
+
+```
+# Choose a checkpoint and determine the number of checkpoints to average
+epoch=30
+avg=15
+./conformer_ctc/ali.py \
+  --epoch $epoch \
+  --avg $avg \
+  --max-duration 500 \
+  --bucketing-sampler 0 \
+  --full-libri 1 \
+  --exp-dir conformer_ctc/exp \
+  --lang-dir data/lang_bpe_5000 \
+  --ali-dir data/ali_5000
+```
+and  you will get four files inside the folder `data/ali_5000`:
+
+```
+$ ls -lh data/ali_500
+total 546M
+-rw-r--r-- 1 kuangfangjun root 1.1M Sep 28 08:06 test_clean.pt
+-rw-r--r-- 1 kuangfangjun root 1.1M Sep 28 08:07 test_other.pt
+-rw-r--r-- 1 kuangfangjun root 542M Sep 28 11:36 train-960.pt
+-rw-r--r-- 1 kuangfangjun root 2.1M Sep 28 11:38 valid.pt
+```
+
+**Note**: It can take more than 3 hours to compute the alignment
+for the training dataset, which contains 960 * 3 = 2880 hours of data.
+
+**Caution**: The model parameters in `conformer_ctc/ali.py` have to match those
+in `conformer_ctc/train.py`.
+
+**Caution**: You have to set the parameter `preserve_id` to `True` for `CutMix`.
+Search `./conformer_ctc/asr_datamodule.py` for `preserve_id`.
+
+**TODO:** Add doc about how to use the extracted alignment in the other pull-request.
--- a/egs/librispeech/ASR/conformer_ctc/ali.py
+++ b/egs/librispeech/ASR/conformer_ctc/ali.py
@ -33,6 +33,7 @@ from icefall.utils import (
    AttributeDict,
    encode_supervisions,
    get_alignments,
+    get_env_info,
    save_alignments,
    setup_logger,
 )
@ -62,7 +63,7 @@ def get_parser():
    parser.add_argument(
        "--lang-dir",
        type=str,
-        default="data/lang_bpe",
+        default="data/lang_bpe_5000",
        help="The lang dir",
    )

@ -95,6 +96,7 @@ def get_params() -> AttributeDict:
            "use_feat_batchnorm": True,
            "output_beam": 10,
            "use_double_scores": True,
+            "env_info": get_env_info(),
        }
    )
    return params
--- a/egs/librispeech/ASR/conformer_ctc/decode.py
+++ b/egs/librispeech/ASR/conformer_ctc/decode.py
@ -143,7 +143,7 @@ def get_parser():
    parser.add_argument(
        "--lang-dir",
        type=str,
-        default="data/lang_bpe",
+        default="data/lang_bpe_5000",
        help="The lang dir",
    )

--- a/egs/librispeech/ASR/conformer_ctc/export.py
+++ b/egs/librispeech/ASR/conformer_ctc/export.py
@ -65,7 +65,7 @@ def get_parser():
    parser.add_argument(
        "--lang-dir",
        type=str,
-        default="data/lang_bpe",
+        default="data/lang_bpe_5000",
        help="""It contains language related input files such as "lexicon.txt"
        """,
    )
--- a/egs/librispeech/ASR/conformer_ctc/train.py
+++ b/egs/librispeech/ASR/conformer_ctc/train.py
@ -115,7 +115,7 @@ def get_parser():
    parser.add_argument(
        "--lang-dir",
        type=str,
-        default="data/lang_bpe",
+        default="data/lang_bpe_5000",
        help="""The lang dir
        It contains language related input files such as
        "lexicon.txt"