diff --git a/egs/multi_zh-hans/ASR/README.md b/egs/multi_zh-hans/ASR/README.md
index 1f0892a89..537816a5d 100644
--- a/egs/multi_zh-hans/ASR/README.md
+++ b/egs/multi_zh-hans/ASR/README.md
@@ -27,7 +27,7 @@ This recipe includes scripts for training Zipformer model using multiple Chinese
 |MagicData|755|https://www.openslr.org/68/|
 |AliMeeting|100|https://openslr.org/119/|
 |WeNetSpeech|10,000|https://github.com/wenet-e2e/WenetSpeech|
-|KeSpeech|1,542|https://openreview.net/forum?id=b3Zoeq2sCLq|
+|KeSpeech|1,542|https://github.com/KeSpeech/KeSpeech|
 
 
 # Included Test Sets
diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py b/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py
index a0ea24d57..5649d3815 100755
--- a/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py
+++ b/egs/multi_zh-hans/ASR/local/compute_fbank_magicdata.py
@@ -80,7 +80,7 @@ def compute_fbank_magicdata(num_mel_bins: int = 80, speed_perturb: bool = False)
             )
             if "train" in partition and speed_perturb:
                 cut_set = (
-                    (cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
+                    cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
                 )
             cut_set = cut_set.compute_and_store_features(
                 extractor=extractor,
@@ -117,6 +117,6 @@ if __name__ == "__main__":
     logging.basicConfig(format=formatter, level=logging.INFO)
 
     args = get_args()
-    compute_fbank_thchs30(
+    compute_fbank_magicdata(
         num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
     )
diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py b/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py
index 32dd1d81a..13fdb036e 100755
--- a/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py
+++ b/egs/multi_zh-hans/ASR/local/compute_fbank_primewords.py
@@ -117,6 +117,6 @@ if __name__ == "__main__":
     logging.basicConfig(format=formatter, level=logging.INFO)
 
     args = get_args()
-    compute_fbank_thchs30(
+    compute_fbank_primewords(
         num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
     )
diff --git a/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py b/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py
index 34442e787..730806954 100755
--- a/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py
+++ b/egs/multi_zh-hans/ASR/local/compute_fbank_stcmds.py
@@ -80,7 +80,7 @@ def compute_fbank_stcmds(num_mel_bins: int = 80, speed_perturb: bool = False):
             )
             if "train" in partition and speed_perturb:
                 cut_set = (
-                    (cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1))
+                    cut_set + cut_set.perturb_speed(0.9) + cut_set.perturb_speed(1.1)
                 )
             cut_set = cut_set.compute_and_store_features(
                 extractor=extractor,
@@ -116,6 +116,6 @@ if __name__ == "__main__":
     logging.basicConfig(format=formatter, level=logging.INFO)
 
     args = get_args()
-    compute_fbank_thchs30(
+    compute_fbank_stcmds(
         num_mel_bins=args.num_mel_bins, speed_perturb=args.speed_perturb
     )
diff --git a/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py b/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py
index 1d6934b61..328bb4809 100755
--- a/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py
+++ b/egs/multi_zh-hans/ASR/local/prepare_for_bpe_model.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright    2021  Xiaomi Corp.        (authors: Zengrui Jin)
+# Copyright    2023  Xiaomi Corp.        (authors: Zengrui Jin)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -15,10 +15,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# This script tokenizes the training transcript by CJK characters
+# and saves the result to transcript_chars.txt, which is used
+# to train the BPE model later.
+
 import argparse
 from pathlib import Path
 
 from tqdm.auto import tqdm
+
 from icefall.utils import tokenize_by_CJK_char
 
 
@@ -52,11 +57,8 @@ def main():
 
     with open(text, "r", encoding="utf-8") as fin:
         text_lines = fin.readlines()
-    tokenized_lines = []
-    for line in tqdm(text_lines, desc="Tokenizing training transcript"):
-        tokenized_lines.append(f"{tokenize_by_CJK_char(line)}\n")
     with open(transcript_path, "w+", encoding="utf-8") as fout:
-        fout.writelines(tokenized_lines)
+        fout.writelines([f"{tokenize_by_CJK_char(line)}\n" for line in text_lines])
 
 
 if __name__ == "__main__":
diff --git a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
index 5d871a5c6..c434ead7e 100755
--- a/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
+++ b/egs/multi_zh-hans/ASR/local/preprocess_kespeech.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 # Copyright    2021  Johns Hopkins University (Piotr Żelasko)
 # Copyright    2021  Xiaomi Corp.             (Fangjun Kuang)
+# Copyright    2023  Xiaomi Corp.             (Zengrui Jin)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -32,7 +33,6 @@ from icefall import setup_logger
 
 def normalize_text(
     utt: str,
-    # punct_pattern=re.compile(r"<(COMMA|PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
     punct_pattern=re.compile(r"<(PERIOD|QUESTIONMARK|EXCLAMATIONPOINT)>"),
     whitespace_pattern=re.compile(r"\s\s+"),
 ) -> str:
diff --git a/egs/multi_zh-hans/ASR/local/train_bpe_model.py b/egs/multi_zh-hans/ASR/local/train_bpe_model.py
index b651fc290..976ea0ba8 100755
--- a/egs/multi_zh-hans/ASR/local/train_bpe_model.py
+++ b/egs/multi_zh-hans/ASR/local/train_bpe_model.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 # Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang)
+# Copyright    2023  Xiaomi Corp.        (authors: Zengrui Jin)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
diff --git a/egs/multi_zh-hans/ASR/prepare.sh b/egs/multi_zh-hans/ASR/prepare.sh
index ccc1e5ea4..8bd52b599 100755
--- a/egs/multi_zh-hans/ASR/prepare.sh
+++ b/egs/multi_zh-hans/ASR/prepare.sh
@@ -5,7 +5,6 @@ export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 set -eou pipefail
 
-nj=16
 stage=-1
 stop_stage=100
 num_splits=100
@@ -256,11 +255,12 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
   log "Stage 12: Prepare KeSpeech"
   if [ ! -d $dl_dir/KeSpeech ]; then
     log "Abort! Please download KeSpeech first."
+    log "KeSpeech download link: https://github.com/KeSpeech/KeSpeech"
   fi
 
   if [ ! -f data/manifests/.kespeech.done ]; then
     mkdir -p data/manifests
-    lhotse prepare kespeech -j $nj $dl_dir/KeSpeech data/manifests/kespeech 
+    lhotse prepare kespeech -j 16 $dl_dir/KeSpeech data/manifests/kespeech 
     touch data/manifests/.kespeech.done
   fi
 
@@ -303,7 +303,7 @@ if [ $stage -le 12 ] && [ $stop_stage -ge 12 ]; then
 fi
 
 if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
-  log "Stage 13: BPE model training"
+  log "Stage 13: BPE model training (note that we use transcripts of wenetspeech only for BPE training)"
   ./local/prepare_for_bpe_model.py --lang-dir ./data/lang_char --text ./data/lang_char/text
 
   for vocab_size in ${vocab_sizes[@]}; do
@@ -348,7 +348,7 @@ if [ $stage -le 13 ] && [ $stop_stage -ge 13 ]; then
 fi
 
 if [ $stage -le 14 ] && [ $stop_stage -ge 14 ]; then
-  log "Stage 14: Prepare G"
+  log "Stage 14: Prepare G (note that we use ngram lm of wenetspeech only for G preparation)"
   
   if [ -d ../../wenetspeech/ASR/data/lang_char/ ]; then
     cd data
diff --git a/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py
index 3518eee3f..b1b7bff93 100644
--- a/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py
+++ b/egs/multi_zh-hans/ASR/zipformer/asr_datamodule.py
@@ -322,7 +322,7 @@ class AsrDataModule:
             sampler=train_sampler,
             batch_size=None,
             num_workers=self.args.num_workers,
-            persistent_workers=False,
+            persistent_workers=True,
             worker_init_fn=worker_init_fn,
         )