From 0f3d9220d4d5d5ccca5bd75474de553fed3d528b Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Sat, 25 Sep 2021 19:52:56 +0800
Subject: [PATCH] Update comments for the time to compute alignments of
 train-960.

---
 egs/librispeech/ASR/conformer_ctc/ali.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/egs/librispeech/ASR/conformer_ctc/ali.py b/egs/librispeech/ASR/conformer_ctc/ali.py
index 07390f7e7..aa5b6bc88 100755
--- a/egs/librispeech/ASR/conformer_ctc/ali.py
+++ b/egs/librispeech/ASR/conformer_ctc/ali.py
@@ -76,7 +76,7 @@ def get_parser():
     parser.add_argument(
         "--ali-dir",
         type=str,
-        default="data/ali",
+        default="data/ali_500",
         help="The experiment dir",
     )
     return parser
@@ -200,11 +200,15 @@ def main():
 
     assert args.return_cuts is True
     assert args.concatenate_cuts is False
+    if args.full_libri is False:
+        print("Changing --full-libri to True")
+        args.full_libri = True
 
     params = get_params()
     params.update(vars(args))
 
     setup_logger(f"{params.exp_dir}/log/ali")
+
     logging.info("Computing alignment - started")
     logging.info(params)
 
@@ -264,8 +268,19 @@ def main():
         "train-960": train_dl,
         "valid": valid_dl,
     }
+    # For train-960, it takes about 3 hours 40 minutes, i.e., 3.67 hours to
+    # compute the alignments if you use --max-duration=500
+    #
+    # There are 960 * 3 = 2880 hours data and it takes only
+    # 3 hours 40 minutes to get the alignment.
+    # The RTF is roughly: 3.67 / 2880 = 0.0012743
     for name, dl in enabled_datasets.items():
         logging.info(f"Processing {name}")
+        if name == "train-960":
+            logging.info(
+                "It will take about 3 hours 40 minutes for {name}, "
+                "which contains 960 * 3 = 2880 hours of data"
+            )
         alignments = compute_alignments(
             model=model,
             dl=dl,