From f783e10dc8ab9ae37d401293f63f267d58f8cd16 Mon Sep 17 00:00:00 2001
From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com>
Date: Sat, 7 May 2022 11:09:31 +0800
Subject: [PATCH 1/4] Do some changes for aishell/ASR/transducer
 stateless/export.py (#347)

* do some changes for aishell/ASR/transducer_stateless/export.py
---
 .../ASR/transducer_stateless/export.py        | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/egs/aishell/ASR/transducer_stateless/export.py b/egs/aishell/ASR/transducer_stateless/export.py
index 5687260df..591b333e0 100755
--- a/egs/aishell/ASR/transducer_stateless/export.py
+++ b/egs/aishell/ASR/transducer_stateless/export.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 #
 # Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang)
+#           2022 Xiaomi Corporation (Author: Mingshuang Luo)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -22,7 +23,7 @@
 Usage:
 ./transducer_stateless/export.py \
   --exp-dir ./transducer_stateless/exp \
-  --bpe-model data/lang_bpe_500/bpe.model \
+  --lang-dir data/lang_char \
   --epoch 20 \
   --avg 10
 
@@ -33,20 +34,19 @@ To use the generated file with `transducer_stateless/decode.py`, you can do:
     cd /path/to/exp_dir
     ln -s pretrained.pt epoch-9999.pt
 
-    cd /path/to/egs/librispeech/ASR
+    cd /path/to/egs/aishell/ASR
     ./transducer_stateless/decode.py \
         --exp-dir ./transducer_stateless/exp \
         --epoch 9999 \
         --avg 1 \
         --max-duration 1 \
-        --bpe-model data/lang_bpe_500/bpe.model
+        --lang-dir data/lang_char
 """
 
 import argparse
 import logging
 from pathlib import Path
 
-import sentencepiece as spm
 import torch
 import torch.nn as nn
 from conformer import Conformer
@@ -56,6 +56,7 @@ from model import Transducer
 
 from icefall.checkpoint import average_checkpoints, load_checkpoint
 from icefall.env import get_env_info
+from icefall.lexicon import Lexicon
 from icefall.utils import AttributeDict, str2bool
 
 
@@ -91,10 +92,10 @@ def get_parser():
     )
 
     parser.add_argument(
-        "--bpe-model",
+        "--lang-dir",
         type=str,
-        default="data/lang_bpe_500/bpe.model",
-        help="Path to the BPE model",
+        default="data/lang_char",
+        help="The lang dir",
     )
 
     parser.add_argument(
@@ -194,12 +195,10 @@ def main():
 
     logging.info(f"device: {device}")
 
-    sp = spm.SentencePieceProcessor()
-    sp.load(params.bpe_model)
+    lexicon = Lexicon(params.lang_dir)
 
-    # <blk> is defined in local/train_bpe_model.py
-    params.blank_id = sp.piece_to_id("<blk>")
-    params.vocab_size = sp.get_piece_size()
+    params.blank_id = 0
+    params.vocab_size = max(lexicon.tokens) + 1
 
     logging.info(params)
 

From 20f092e7098f9809db1ea2ff25a37b17ff4f8237 Mon Sep 17 00:00:00 2001
From: Zengwei Yao <yaozengwei@outlook.com>
Date: Sat, 7 May 2022 13:09:11 +0800
Subject: [PATCH 2/4] Support decoding with averaged model when using --iter
 (#353)

* support decoding with averaged model when using --iter

* minor fix

* monir fix of copyright date
---
 .../pruned_transducer_stateless4/decode.py    | 65 ++++++++++++++-----
 icefall/checkpoint.py                         |  6 +-
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/egs/librispeech/ASR/pruned_transducer_stateless4/decode.py b/egs/librispeech/ASR/pruned_transducer_stateless4/decode.py
index 025ebd7bc..1f4a22213 100755
--- a/egs/librispeech/ASR/pruned_transducer_stateless4/decode.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless4/decode.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 #
-# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang,
-#                                            Zengwei Yao)
+# Copyright 2021-2022 Xiaomi Corporation (Author: Fangjun Kuang,
+#                                                 Zengwei Yao)
 #
 # See ../../../../LICENSE for clarification regarding multiple authors
 #
@@ -540,23 +540,52 @@ def main():
             model.to(device)
             model.load_state_dict(average_checkpoints(filenames, device=device))
     else:
-        assert params.iter == 0 and params.avg > 0
-        start = params.epoch - params.avg
-        assert start >= 1
-        filename_start = f"{params.exp_dir}/epoch-{start}.pt"
-        filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt"
-        logging.info(
-            f"Calculating the averaged model over epoch range from "
-            f"{start} (excluded) to {params.epoch}"
-        )
-        model.to(device)
-        model.load_state_dict(
-            average_checkpoints_with_averaged_model(
-                filename_start=filename_start,
-                filename_end=filename_end,
-                device=device,
+        if params.iter > 0:
+            filenames = find_checkpoints(
+                params.exp_dir, iteration=-params.iter
+            )[: params.avg + 1]
+            if len(filenames) == 0:
+                raise ValueError(
+                    f"No checkpoints found for"
+                    f" --iter {params.iter}, --avg {params.avg}"
+                )
+            elif len(filenames) < params.avg + 1:
+                raise ValueError(
+                    f"Not enough checkpoints ({len(filenames)}) found for"
+                    f" --iter {params.iter}, --avg {params.avg}"
+                )
+            filename_start = filenames[-1]
+            filename_end = filenames[0]
+            logging.info(
+                "Calculating the averaged model over iteration checkpoints"
+                f" from {filename_start} (excluded) to {filename_end}"
+            )
+            model.to(device)
+            model.load_state_dict(
+                average_checkpoints_with_averaged_model(
+                    filename_start=filename_start,
+                    filename_end=filename_end,
+                    device=device,
+                )
+            )
+        else:
+            assert params.avg > 0
+            start = params.epoch - params.avg
+            assert start >= 1
+            filename_start = f"{params.exp_dir}/epoch-{start}.pt"
+            filename_end = f"{params.exp_dir}/epoch-{params.epoch}.pt"
+            logging.info(
+                f"Calculating the averaged model over epoch range from "
+                f"{start} (excluded) to {params.epoch}"
+            )
+            model.to(device)
+            model.load_state_dict(
+                average_checkpoints_with_averaged_model(
+                    filename_start=filename_start,
+                    filename_end=filename_end,
+                    device=device,
+                )
             )
-        )
 
     model.to(device)
     model.eval()
diff --git a/icefall/checkpoint.py b/icefall/checkpoint.py
index ba3823ffc..170586455 100644
--- a/icefall/checkpoint.py
+++ b/icefall/checkpoint.py
@@ -1,5 +1,5 @@
-# Copyright      2021  Xiaomi Corp.        (authors: Fangjun Kuang,
-#                                                    Zengwei Yao)
+# Copyright  2021-2022  Xiaomi Corporation  (authors: Fangjun Kuang,
+#                                                     Zengwei Yao)
 #
 # See ../../LICENSE for clarification regarding multiple authors
 #
@@ -405,7 +405,7 @@ def average_checkpoints_with_averaged_model(
     (3) avg = (model_end + model_start * (weight_start / weight_end))
               * weight_end
 
-    The model index could be epoch number or checkpoint number.
+    The model index could be epoch number or iteration number.
 
     Args:
       filename_start:

From cd460f7bf174011ff65a5aec1b68f01a3496781e Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Sat, 7 May 2022 17:18:34 +0800
Subject: [PATCH 3/4] Stringify torch.__version__ before serializing it. (#354)

---
 icefall/env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/icefall/env.py b/icefall/env.py
index c29cbb078..0192d1c11 100644
--- a/icefall/env.py
+++ b/icefall/env.py
@@ -95,7 +95,7 @@ def get_env_info() -> Dict[str, Any]:
         "k2-git-sha1": k2.version.__git_sha1__,
         "k2-git-date": k2.version.__git_date__,
         "lhotse-version": lhotse.__version__,
-        "torch-version": torch.__version__,
+        "torch-version": str(torch.__version__),
         "torch-cuda-available": torch.cuda.is_available(),
         "torch-cuda-version": torch.version.cuda,
         "python-version": sys.version[:3],

From bc284e88e6459423b57fdef80ce4a8aed6122dcc Mon Sep 17 00:00:00 2001
From: Fangjun Kuang <csukuangfj@gmail.com>
Date: Tue, 10 May 2022 14:51:34 +0800
Subject: [PATCH 4/4] Run decode.py in GitHub actions. (#356)

---
 ...k-librispeech-test-clean-and-test-other.sh |  17 +++
 ...peech-test-clean-and-test-other-dataset.sh |  23 ++++
 .github/scripts/install-kaldifeat.sh          |  13 +++
 ...ech-test-clean-and-test-other-manifests.sh |  11 ++
 ...-pruned-transducer-stateless-2022-03-12.sh |  28 +++++
 ...pruned-transducer-stateless2-2022-04-29.sh |  28 +++++
 ...pruned-transducer-stateless3-2022-04-29.sh |  28 +++++
 ...speech-transducer-stateless2-2022-04-19.sh |  28 +++++
 .../workflows/run-librispeech-2022-03-12.yml  |  85 +++++++++++++--
 .../workflows/run-librispeech-2022-04-29.yml  | 101 ++++++++++++++++--
 ...peech-transducer-stateless2-2022-04-19.yml |  84 +++++++++++++--
 .../run-pretrained-conformer-ctc.yml          |   9 +-
 ...-transducer-stateless-librispeech-100h.yml |   9 +-
 ...r-stateless-librispeech-multi-datasets.yml |   9 +-
 ...ransducer-stateless-modified-2-aishell.yml |   9 +-
 ...-transducer-stateless-modified-aishell.yml |   9 +-
 .../run-pretrained-transducer-stateless.yml   |   9 +-
 .../workflows/run-pretrained-transducer.yml   |   7 --
 .../beam_search.py                            |  13 ++-
 19 files changed, 434 insertions(+), 86 deletions(-)
 create mode 100755 .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
 create mode 100755 .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
 create mode 100755 .github/scripts/install-kaldifeat.sh
 create mode 100755 .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh

diff --git a/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
new file mode 100755
index 000000000..a4a6cd8d7
--- /dev/null
+++ b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+# This script computes fbank features for the test-clean and test-other datasets.
+# The computed features are saved to ~/tmp/fbank-libri and are
+# cached for later runs
+
+export PYTHONPATH=$PWD:$PYTHONPATH
+echo $PYTHONPATH
+
+mkdir ~/tmp/fbank-libri
+cd egs/librispeech/ASR
+mkdir -p data
+cd data
+[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
+cd ..
+./local/compute_fbank_librispeech.py
+ls -lh data/fbank/
diff --git a/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh b/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
new file mode 100755
index 000000000..3efcc13e3
--- /dev/null
+++ b/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+# This script downloads the test-clean and test-other datasets
+# of LibriSpeech and unzip them to the folder ~/tmp/download,
+# which is cached by GitHub actions for later runs.
+#
+# You will find directories ~/tmp/download/LibriSpeech after running
+# this script.
+
+mkdir ~/tmp/download
+cd egs/librispeech/ASR
+ln -s ~/tmp/download .
+cd download
+wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
+tar xf test-clean.tar.gz
+rm test-clean.tar.gz
+
+wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
+tar xf test-other.tar.gz
+rm test-other.tar.gz
+pwd
+ls -lh
+ls -lh LibriSpeech
diff --git a/.github/scripts/install-kaldifeat.sh b/.github/scripts/install-kaldifeat.sh
new file mode 100755
index 000000000..6666a5064
--- /dev/null
+++ b/.github/scripts/install-kaldifeat.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# This script installs kaldifeat into the directory ~/tmp/kaldifeat
+# which is cached by GitHub actions for later runs.
+
+mkdir -p ~/tmp
+cd ~/tmp
+git clone https://github.com/csukuangfj/kaldifeat
+cd kaldifeat
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release ..
+make -j2 _kaldifeat
diff --git a/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh b/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
new file mode 100755
index 000000000..e0b87e0fc
--- /dev/null
+++ b/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+# This script assumes that test-clean and test-other are downloaded
+# to egs/librispeech/ASR/download/LibriSpeech and generates manifest
+# files in egs/librispeech/ASR/data/manifests
+
+cd egs/librispeech/ASR
+[ ! -e download ] && ln -s ~/tmp/download .
+mkdir -p data/manifests
+lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
+ls -lh data/manifests
diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
index 2387a16e2..59e9edf41 100755
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
@@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do
     $repo/test_wavs/1221-135766-0001.wav \
     $repo/test_wavs/1221-135766-0002.wav
 done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
+  mkdir -p pruned_transducer_stateless/exp
+  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh pruned_transducer_stateless/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=50
+
+  for method in greedy_search fast_beam_search; do
+    log "Decoding with $method"
+
+    ./pruned_transducer_stateless/decode.py \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --max-duration $max_duration \
+      --exp-dir pruned_transducer_stateless/exp
+  done
+
+  rm pruned_transducer_stateless/exp/*.pt
+fi
diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
index ee8610996..1b62caab8 100755
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
@@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do
     $repo/test_wavs/1221-135766-0001.wav \
     $repo/test_wavs/1221-135766-0002.wav
 done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
+  mkdir -p pruned_transducer_stateless2/exp
+  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless2/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh pruned_transducer_stateless2/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=50
+
+  for method in greedy_search fast_beam_search; do
+    log "Decoding with $method"
+
+    ./pruned_transducer_stateless2/decode.py \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --max-duration $max_duration \
+      --exp-dir pruned_transducer_stateless2/exp
+  done
+
+  rm pruned_transducer_stateless2/exp/*.pt
+fi
diff --git a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
index d28e888e7..1177e5a86 100755
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
@@ -49,3 +49,31 @@ for method in modified_beam_search beam_search fast_beam_search; do
     $repo/test_wavs/1221-135766-0001.wav \
     $repo/test_wavs/1221-135766-0002.wav
 done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
+  mkdir -p pruned_transducer_stateless3/exp
+  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh pruned_transducer_stateless3/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=50
+
+  for method in greedy_search fast_beam_search; do
+    log "Decoding with $method"
+
+    ./pruned_transducer_stateless3/decode.py \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --max-duration $max_duration \
+      --exp-dir pruned_transducer_stateless3/exp
+  done
+
+  rm pruned_transducer_stateless3/exp/*.pt
+fi
diff --git a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
index 102547c8b..d2a2d3c02 100755
--- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
+++ b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
@@ -45,3 +45,31 @@ for method in modified_beam_search beam_search; do
     $repo/test_wavs/1221-135766-0001.wav \
     $repo/test_wavs/1221-135766-0002.wav
 done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
+  mkdir -p transducer_stateless2/exp
+  ln -s $PWD/$repo/exp/pretrained.pt transducer_stateless2/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh transducer_stateless2/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=50
+
+  for method in greedy_search modified_beam_search; do
+    log "Decoding with $method"
+
+    ./transducer_stateless2/decode.py \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --max-duration $max_duration \
+      --exp-dir transducer_stateless2/exp
+  done
+
+  rm transducer_stateless2/exp/*.pt
+fi
diff --git a/.github/workflows/run-librispeech-2022-03-12.yml b/.github/workflows/run-librispeech-2022-03-12.yml
index 135285f15..39c6fd24f 100644
--- a/.github/workflows/run-librispeech-2022-03-12.yml
+++ b/.github/workflows/run-librispeech-2022-03-12.yml
@@ -24,9 +24,18 @@ on:
   pull_request:
     types: [labeled]
 
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
 jobs:
   run_librispeech_2022_03_12:
-    if: github.event.label.name == 'ready' || github.event_name == 'push'
+    if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule'
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -63,20 +72,78 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+
 
       - name: Inference with pre-trained model
         shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
         run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
           sudo apt-get -qq install git-lfs tree sox
           export PYTHONPATH=$PWD:$PYTHONPATH
           export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
           export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
           .github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
+
+      - name: Display decoding results
+        if: github.event_name == 'schedule'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR/
+          tree ./pruned_transducer_stateless/exp
+
+          cd pruned_transducer_stateless
+          echo "results for pruned_transducer_stateless"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+      - name: Upload decoding results for pruned_transducer_stateless
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless-2022-03-12
+          path: egs/librispeech/ASR/pruned_transducer_stateless/exp/
diff --git a/.github/workflows/run-librispeech-2022-04-29.yml b/.github/workflows/run-librispeech-2022-04-29.yml
index 129e30698..ffaee25f1 100644
--- a/.github/workflows/run-librispeech-2022-04-29.yml
+++ b/.github/workflows/run-librispeech-2022-04-29.yml
@@ -24,9 +24,18 @@ on:
   pull_request:
     types: [labeled]
 
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
 jobs:
   run_librispeech_2022_04_29:
-    if: github.event.label.name == 'ready' || github.event_name == 'push'
+    if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule'
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -63,18 +72,50 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
 
       - name: Inference with pre-trained model
         shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
         run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
           sudo apt-get -qq install git-lfs tree sox
           export PYTHONPATH=$PWD:$PYTHONPATH
           export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
@@ -83,3 +124,45 @@ jobs:
           .github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
 
           .github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
+
+      - name: Display decoding results
+        if: github.event_name == 'schedule'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR
+          tree pruned_transducer_stateless2/exp
+          cd pruned_transducer_stateless2
+          echo "results for pruned_transducer_stateless2"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          cd ../
+          tree pruned_transducer_stateless3/exp
+          cd pruned_transducer_stateless3
+          echo "results for pruned_transducer_stateless3"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+      - name: Upload decoding results for pruned_transducer_stateless2
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless2-2022-04-29
+          path: egs/librispeech/ASR/pruned_transducer_stateless2/exp/
+
+      - name: Upload decoding results for pruned_transducer_stateless3
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless3-2022-04-29
+          path: egs/librispeech/ASR/pruned_transducer_stateless3/exp/
diff --git a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
index 5871f926d..c52b543d8 100644
--- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
+++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
@@ -24,9 +24,18 @@ on:
   pull_request:
     types: [labeled]
 
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
 jobs:
   run_librispeech_2022_04_19:
-    if: github.event.label.name == 'ready' || github.event_name == 'push'
+    if: github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'schedule'
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -63,20 +72,77 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
 
       - name: Inference with pre-trained model
         shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
         run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
           sudo apt-get -qq install git-lfs tree sox
           export PYTHONPATH=$PWD:$PYTHONPATH
           export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
           export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
           .github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
+
+      - name: Display decoding results
+        if: github.event_name == 'schedule'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR/
+          tree ./transducer_stateless2/exp
+
+          cd transducer_stateless2
+          echo "results for transducer_stateless2"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===modified_beam_search==="
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+      - name: Upload decoding results for transducer_stateless2
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-transducer_stateless2-2022-04-19
+          path: egs/librispeech/ASR/transducer_stateless2/exp/
diff --git a/.github/workflows/run-pretrained-conformer-ctc.yml b/.github/workflows/run-pretrained-conformer-ctc.yml
index 6575ceb65..69f15060b 100644
--- a/.github/workflows/run-pretrained-conformer-ctc.yml
+++ b/.github/workflows/run-pretrained-conformer-ctc.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
index 80ab356e6..438f6e882 100644
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
index d2231750c..f50ac2af7 100644
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
index a84e804c6..659dbc9da 100644
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
index 7fa48d15a..f4e56bd6c 100644
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer-stateless.yml b/.github/workflows/run-pretrained-transducer-stateless.yml
index 678e79339..ca355e778 100644
--- a/.github/workflows/run-pretrained-transducer-stateless.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless.yml
@@ -62,14 +62,7 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
-          make -j2 _kaldifeat
+          .github/scripts/install-kaldifeat.sh
 
       - name: Inference with pre-trained model
         shell: bash
diff --git a/.github/workflows/run-pretrained-transducer.yml b/.github/workflows/run-pretrained-transducer.yml
index 781783bcf..f1b051047 100644
--- a/.github/workflows/run-pretrained-transducer.yml
+++ b/.github/workflows/run-pretrained-transducer.yml
@@ -62,13 +62,6 @@ jobs:
         if: steps.my-cache.outputs.cache-hit != 'true'
         shell: bash
         run: |
-          mkdir -p ~/tmp
-          cd ~/tmp
-          git clone https://github.com/csukuangfj/kaldifeat
-          cd kaldifeat
-          mkdir build
-          cd build
-          cmake -DCMAKE_BUILD_TYPE=Release ..
           make -j2 _kaldifeat
 
       - name: Inference with pre-trained model
diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py b/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py
index 100aeaa6e..5d1e9b471 100644
--- a/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py
+++ b/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py
@@ -15,6 +15,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from dataclasses import dataclass
 from typing import Dict, List, Optional
 
@@ -565,8 +566,10 @@ def modified_beam_search(
         for i in range(batch_size):
             topk_log_probs, topk_indexes = ragged_log_probs[i].topk(beam)
 
-            topk_hyp_indexes = (topk_indexes // vocab_size).tolist()
-            topk_token_indexes = (topk_indexes % vocab_size).tolist()
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                topk_hyp_indexes = (topk_indexes // vocab_size).tolist()
+                topk_token_indexes = (topk_indexes % vocab_size).tolist()
 
             for k in range(len(topk_hyp_indexes)):
                 hyp_idx = topk_hyp_indexes[k]
@@ -679,8 +682,10 @@ def _deprecated_modified_beam_search(
         topk_hyp_indexes = topk_indexes // logits.size(-1)
         topk_token_indexes = topk_indexes % logits.size(-1)
 
-        topk_hyp_indexes = topk_hyp_indexes.tolist()
-        topk_token_indexes = topk_token_indexes.tolist()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            topk_hyp_indexes = topk_hyp_indexes.tolist()
+            topk_token_indexes = topk_token_indexes.tolist()
 
         for i in range(len(topk_hyp_indexes)):
             hyp = A[topk_hyp_indexes[i]]