diff --git a/egs/librispeech/ASR/prepare.sh b/egs/librispeech/ASR/prepare.sh
index 3b2678ec4..1bbf7bbcf 100755
--- a/egs/librispeech/ASR/prepare.sh
+++ b/egs/librispeech/ASR/prepare.sh
@@ -60,8 +60,11 @@ log "dl_dir: $dl_dir"
 
 if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
   log "Stage -1: Download LM"
-  [ ! -e $dl_dir/lm ] && mkdir -p $dl_dir/lm
-  ./local/download_lm.py --out-dir=$dl_dir/lm
+  mkdir -p $dl_dir/lm
+  if [ ! -e $dl_dir/lm/.done ]; then
+    ./local/download_lm.py --out-dir=$dl_dir/lm
+    touch $dl_dir/lm/.done
+  fi
 fi
 
 if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then
@@ -91,7 +94,10 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
   # We assume that you have downloaded the LibriSpeech corpus
   # to $dl_dir/LibriSpeech
   mkdir -p data/manifests
-  lhotse prepare librispeech -j $nj $dl_dir/LibriSpeech data/manifests
+  if [ ! -e data/manifests/.librispeech.done ]; then
+    lhotse prepare librispeech -j $nj $dl_dir/LibriSpeech data/manifests
+    touch data/manifests/.librispeech.done
+  fi
 fi
 
 if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
@@ -99,19 +105,28 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
   # We assume that you have downloaded the musan corpus
   # to data/musan
   mkdir -p data/manifests
-  lhotse prepare musan $dl_dir/musan data/manifests
+  if [ ! -e data/manifests/.musan.done ]; then
+    lhotse prepare musan $dl_dir/musan data/manifests
+    touch data/manifests/.musan.done
+  fi
 fi
 
 if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then
   log "Stage 3: Compute fbank for librispeech"
   mkdir -p data/fbank
-  ./local/compute_fbank_librispeech.py
+  if [ ! -e data/fbank/.librispeech.done ]; then
+    ./local/compute_fbank_librispeech.py
+    touch data/fbank/.librispeech.done
+  fi
 fi
 
 if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
   log "Stage 4: Compute fbank for musan"
   mkdir -p data/fbank
-  ./local/compute_fbank_musan.py
+  if [ ! -e data/fbank/.musan.done ]; then
+    ./local/compute_fbank_musan.py
+    touch data/fbank/.musan.done
+  fi
 fi
 
 if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
diff --git a/egs/librispeech/ASR/transducer_stateless/alignment.py b/egs/librispeech/ASR/transducer_stateless/alignment.py
index c1cd6e3b1..f143611ea 100644
--- a/egs/librispeech/ASR/transducer_stateless/alignment.py
+++ b/egs/librispeech/ASR/transducer_stateless/alignment.py
@@ -29,6 +29,9 @@ from model import Transducer
 # acoustic frame indexes) and the vertical axis is `u` (representing
 # BPE tokens of the transcript).
 #
+# The notations `t` and `u` are from the paper
+# https://arxiv.org/pdf/1211.3711.pdf
+#
 # Beam search is used to find the path with the
 # highest log probabilities.
 #
@@ -37,12 +40,13 @@ from model import Transducer
 # from `./train.py` to train a model that satisfies this assumption.
 
 
-# AlignItem is a node in the lattice, where its
+# AlignItem is the ending node of a path originated from the starting node.
 # len(ys) equals to `t` and pos_u is the u coordinate
 # in the lattice.
 @dataclass
 class AlignItem:
-    # log prob of this item originating from the start item
+    # total log prob of the path that ends at this item.
+    # The path is originated from the starting node.
     log_prob: float
 
     # It contains framewise token alignment
@@ -234,7 +238,7 @@ def force_alignment(
     return ans
 
 
-def get_word_starting_frame(
+def get_word_starting_frames(
     ali: List[int], sp: spm.SentencePieceProcessor
 ) -> List[int]:
     """Get the starting frame of each word from the given token alignments.
diff --git a/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py b/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
index fed3c121c..99d5b3788 100755
--- a/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
+++ b/egs/librispeech/ASR/transducer_stateless/test_compute_ali.py
@@ -43,7 +43,7 @@ from pathlib import Path
 
 import sentencepiece as spm
 import torch
-from alignment import get_word_starting_frame
+from alignment import get_word_starting_frames
 from lhotse import CutSet, load_manifest
 from lhotse.dataset import K2SpeechRecognitionDataset, SingleCutSampler
 from lhotse.dataset.collation import collate_custom_field
@@ -135,17 +135,17 @@ def main():
                 (cuts[i].features.num_frames - 1) // 2 - 1
             ) // 2 == token_alignment_length[i]
 
-            word_starting_frame = get_word_starting_frame(
+            word_starting_frames = get_word_starting_frames(
                 token_alignment[i, : token_alignment_length[i]].tolist(), sp=sp
             )
             word_starting_time = [
                 "{:.2f}".format(i * frame_shift_in_second)
-                for i in word_starting_frame
+                for i in word_starting_frames
             ]
 
             words = supervisions["text"][i].split()
 
-            assert len(word_starting_frame) == len(words)
+            assert len(word_starting_frames) == len(words)
             word_starting_time_dict[cuts[i].id] = list(
                 zip(words, word_starting_time)
             )