resolve conflict

2025-09-09 00:54:18 +00:00 · 2023-06-19 12:31:35 +08:00 · 2023-06-19 12:31:35 +08:00 · ad24b4ad9e
commit ad24b4ad9e
parent 3cbc81d93a d667dc365b
587 changed files with 87456 additions and 6558 deletions
--- a/.flake8
+++ b/.flake8
@ -13,6 +13,7 @@ per-file-ignores =
    egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
    egs/librispeech/ASR/conformer_ctc*/*py: E501,
    egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
+    egs/librispeech/ASR/zipformer/*.py: E501, E203
    egs/librispeech/ASR/RESULTS.md: E999,

    # invalid escape sequence (cause by tex formular), W605
--- a/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+++ b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
@ -15,5 +15,5 @@ mkdir -p data
 cd data
 [ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
 cd ..
-./local/compute_fbank_librispeech.py
+./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
 ls -lh data/fbank/
--- a/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh
+++ b/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh
@ -25,7 +25,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
+++ b/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
@ -18,7 +18,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-conv-emformer-transducer-stateless2-2022-12-05.sh
+++ b/.github/scripts/run-librispeech-conv-emformer-transducer-stateless2-2022-12-05.sh
@ -1,79 +0,0 @@
-#!/usr/bin/env bash
-#
-set -e
-
-log() {
-  # This function is from espnet
-  local fname=${BASH_SOURCE[1]##*/}
-  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
-}
-
-cd egs/librispeech/ASR
-
-repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
-
-log "Downloading pre-trained model from $repo_url"
-GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
-repo=$(basename $repo_url)
-pushd $repo
-git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
-git lfs pull --include "data/lang_bpe_500/bpe.model"
-cd exp
-ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
-popd
-
-log "Display test files"
-tree $repo/
-soxi $repo/test_wavs/*.wav
-ls -lh $repo/test_wavs/*.wav
-
-log  "Install ncnn and pnnx"
-
-# We are using a modified ncnn here. Will try to merge it to the official repo
-# of ncnn
-git clone https://github.com/csukuangfj/ncnn
-pushd ncnn
-git submodule init
-git submodule update python/pybind11
-python3 setup.py bdist_wheel
-ls -lh dist/
-pip install dist/*.whl
-cd tools/pnnx
-mkdir build
-cd build
-cmake -D Python3_EXECUTABLE=/opt/hostedtoolcache/Python/3.8.14/x64/bin/python3 ..
-make -j4 pnnx
-
-./src/pnnx || echo "pass"
-
-popd
-
-log "Test exporting to pnnx format"
-
-./conv_emformer_transducer_stateless2/export-for-ncnn.py \
-  --exp-dir $repo/exp \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --epoch 99 \
-  --avg 1 \
-  --use-averaged-model 0 \
-  \
-  --num-encoder-layers 12 \
-  --chunk-length 32 \
-  --cnn-module-kernel 31 \
-  --left-context-length 32 \
-  --right-context-length 8 \
-  --memory-size 32
-
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/encoder_jit_trace-pnnx.pt
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/decoder_jit_trace-pnnx.pt
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/joiner_jit_trace-pnnx.pt
-
-./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
- --tokens $repo/data/lang_bpe_500/tokens.txt \
- --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
- --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
- --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
- --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
- --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
- --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
- $repo/test_wavs/1089-134686-0001.wav
--- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
+++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
@ -20,7 +20,6 @@ abs_repo=$(realpath $repo)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
@ -28,63 +27,6 @@ ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd

-log  "Install ncnn and pnnx"
-
-# We are using a modified ncnn here. Will try to merge it to the official repo
-# of ncnn
-git clone https://github.com/csukuangfj/ncnn
-pushd ncnn
-git submodule init
-git submodule update python/pybind11
-python3 setup.py bdist_wheel
-ls -lh dist/
-pip install dist/*.whl
-cd tools/pnnx
-mkdir build
-cd build
-cmake ..
-make -j4 pnnx
-
-./src/pnnx || echo "pass"
-
-popd
-
-log "Test exporting to pnnx format"
-
-./lstm_transducer_stateless2/export.py \
-  --exp-dir $repo/exp \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --epoch 99 \
-  --avg 1 \
-  --use-averaged-model 0 \
-  --pnnx 1
-
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/encoder_jit_trace-pnnx.pt
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/decoder_jit_trace-pnnx.pt
-./ncnn/tools/pnnx/build/src/pnnx $repo/exp/joiner_jit_trace-pnnx.pt
-
-./lstm_transducer_stateless2/ncnn-decode.py \
- --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
- --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
- --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
- --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
- --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
- --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
- --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
- $repo/test_wavs/1089-134686-0001.wav
-
-./lstm_transducer_stateless2/streaming-ncnn-decode.py \
- --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
- --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
- --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
- --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
- --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
- --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
- --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
- $repo/test_wavs/1089-134686-0001.wav
-
-
-
 log "Test exporting with torch.jit.trace()"

 ./lstm_transducer_stateless2/export.py \
@ -106,47 +48,6 @@ log "Decode with models exported by torch.jit.trace()"
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav

-log "Test exporting to ONNX"
-
-./lstm_transducer_stateless2/export.py \
-  --exp-dir $repo/exp \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --epoch 99 \
-  --avg 1 \
-  --use-averaged-model 0 \
-  --onnx 1
-
-log "Decode with ONNX models "
-
-./lstm_transducer_stateless2/streaming-onnx-decode.py \
-  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
-  --encoder-model-filename $repo//exp/encoder.onnx \
-  --decoder-model-filename $repo/exp/decoder.onnx \
-  --joiner-model-filename $repo/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
- $repo/test_wavs/1089-134686-0001.wav
-
-./lstm_transducer_stateless2/streaming-onnx-decode.py \
-  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
-  --encoder-model-filename $repo//exp/encoder.onnx \
-  --decoder-model-filename $repo/exp/decoder.onnx \
-  --joiner-model-filename $repo/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
- $repo/test_wavs/1221-135766-0001.wav
-
-./lstm_transducer_stateless2/streaming-onnx-decode.py \
-  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
-  --encoder-model-filename $repo//exp/encoder.onnx \
-  --decoder-model-filename $repo/exp/decoder.onnx \
-  --joiner-model-filename $repo/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
- $repo/test_wavs/1221-135766-0002.wav
-
-
-
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"

--- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
@ -23,7 +23,6 @@ popd

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
@ -22,7 +22,6 @@ popd

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
@ -27,14 +26,6 @@ ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
 ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
 popd

-log "Test exporting to ONNX format"
-
-./pruned_transducer_stateless3/export.py \
-  --exp-dir $repo/exp \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --epoch 99 \
-  --avg 1 \
-  --onnx 1

 log "Export to torchscript model"
 ./pruned_transducer_stateless3/export.py \
@ -51,30 +42,8 @@ log "Export to torchscript model"
  --avg 1 \
  --jit-trace 1

-ls -lh $repo/exp/*.onnx
 ls -lh $repo/exp/*.pt

-log "Decode with ONNX models"
-
-./pruned_transducer_stateless3/onnx_check.py \
-  --jit-filename $repo/exp/cpu_jit.pt \
-  --onnx-encoder-filename $repo/exp/encoder.onnx \
-  --onnx-decoder-filename $repo/exp/decoder.onnx \
-  --onnx-joiner-filename $repo/exp/joiner.onnx \
-  --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \
-  --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx
-
-./pruned_transducer_stateless3/onnx_pretrained.py \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --encoder-model-filename $repo/exp/encoder.onnx \
-  --decoder-model-filename $repo/exp/decoder.onnx \
-  --joiner-model-filename $repo/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
-  $repo/test_wavs/1089-134686-0001.wav \
-  $repo/test_wavs/1221-135766-0001.wav \
-  $repo/test_wavs/1221-135766-0002.wav
-
 log "Decode with models exported by torch.jit.trace()"

 ./pruned_transducer_stateless3/jit_pretrained.py \
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
@ -30,15 +29,6 @@ ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd

-log "Test exporting to ONNX format"
-./pruned_transducer_stateless7/export.py \
-  --exp-dir $repo/exp \
-  --use-averaged-model false \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --epoch 99 \
-  --avg 1 \
-  --onnx 1
-
 log "Export to torchscript model"
 ./pruned_transducer_stateless7/export.py \
  --exp-dir $repo/exp \
@ -50,27 +40,6 @@ log "Export to torchscript model"

 ls -lh $repo/exp/*.pt

-log "Decode with ONNX models"
-
-./pruned_transducer_stateless7/onnx_check.py \
-  --jit-filename $repo/exp/cpu_jit.pt \
-  --onnx-encoder-filename $repo/exp/encoder.onnx \
-  --onnx-decoder-filename $repo/exp/decoder.onnx \
-  --onnx-joiner-filename $repo/exp/joiner.onnx \
-  --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \
-  --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx
-
-./pruned_transducer_stateless7/onnx_pretrained.py \
-  --bpe-model $repo/data/lang_bpe_500/bpe.model \
-  --encoder-model-filename $repo/exp/encoder.onnx \
-  --decoder-model-filename $repo/exp/decoder.onnx \
-  --joiner-model-filename $repo/exp/joiner.onnx \
-  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
-  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
-  $repo/test_wavs/1089-134686-0001.wav \
-  $repo/test_wavs/1221-135766-0001.wav \
-  $repo/test_wavs/1221-135766-0002.wav
-
 log "Decode with models exported by torch.jit.script()"

 ./pruned_transducer_stateless7/jit_pretrained.py \
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
@ -18,7 +18,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
@ -148,4 +147,4 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
  done

  rm pruned_transducer_stateless7_ctc/exp/*.pt
-fi
+fi
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
@ -10,7 +10,7 @@ log() {

 cd egs/librispeech/ASR

-repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2022-12-14
+repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29

 log "Downloading pre-trained model from $repo_url"
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
@ -18,7 +18,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
@ -19,16 +19,16 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

-pushd $repo/exp
+pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 git lfs pull --include "exp/encoder_jit_trace.pt"
 git lfs pull --include "exp/decoder_jit_trace.pt"
 git lfs pull --include "exp/joiner_jit_trace.pt"
+cd exp
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh
+++ b/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh
+++ b/.github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh
@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/librispeech/ASR
+
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
+
+log "Downloading pre-trained model from $repo_url"
+git lfs install
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+log "Display test files"
+tree $repo/
+ls -lh $repo/test_wavs/*.wav
+
+pushd $repo/exp
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/jit_script_chunk_16_left_128.pt"
+git lfs pull --include "exp/pretrained.pt"
+ln -s pretrained.pt epoch-99.pt
+ls -lh *.pt
+popd
+
+log "Export to torchscript model"
+./zipformer/export.py \
+  --exp-dir $repo/exp \
+  --use-averaged-model false \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --causal 1 \
+  --chunk-size 16 \
+  --left-context-frames 128 \
+  --epoch 99 \
+  --avg 1 \
+  --jit 1
+
+ls -lh $repo/exp/*.pt
+
+log "Decode with models exported by torch.jit.script()"
+
+./zipformer/jit_pretrained_streaming.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \
+  $repo/test_wavs/1089-134686-0001.wav
+
+for method in greedy_search modified_beam_search fast_beam_search; do
+  log "$method"
+
+  ./zipformer/pretrained.py \
+    --causal 1 \
+    --chunk-size 16 \
+    --left-context-frames 128 \
+    --method $method \
+    --beam-size 4 \
+    --checkpoint $repo/exp/pretrained.pt \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
+    $repo/test_wavs/1089-134686-0001.wav \
+    $repo/test_wavs/1221-135766-0001.wav \
+    $repo/test_wavs/1221-135766-0002.wav
+done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
+  mkdir -p zipformer/exp
+  ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh zipformer/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=100
+
+  for method in greedy_search fast_beam_search modified_beam_search; do
+    log "Simulated streaming decoding with $method"
+
+    ./zipformer/decode.py \
+      --causal 1 \
+      --chunk-size 16 \
+      --left-context-frames 128 \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --use-averaged-model 0 \
+      --max-duration $max_duration \
+      --exp-dir zipformer/exp
+  done
+
+  for method in greedy_search fast_beam_search modified_beam_search; do
+    log "Chunk-wise streaming decoding with $method"
+
+    ./zipformer/streaming_decode.py \
+      --causal 1 \
+      --chunk-size 16 \
+      --left-context-frames 128 \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --use-averaged-model 0 \
+      --max-duration $max_duration \
+      --exp-dir zipformer/exp
+  done
+
+  rm zipformer/exp/*.pt
+fi
--- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
+++ b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-librispeech-zipformer-2023-05-18.sh
+++ b/.github/scripts/run-librispeech-zipformer-2023-05-18.sh
@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/librispeech/ASR
+
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
+
+log "Downloading pre-trained model from $repo_url"
+git lfs install
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+log "Display test files"
+tree $repo/
+ls -lh $repo/test_wavs/*.wav
+
+pushd $repo/exp
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/jit_script.pt"
+git lfs pull --include "exp/pretrained.pt"
+ln -s pretrained.pt epoch-99.pt
+ls -lh *.pt
+popd
+
+log "Export to torchscript model"
+./zipformer/export.py \
+  --exp-dir $repo/exp \
+  --use-averaged-model false \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --jit 1
+
+ls -lh $repo/exp/*.pt
+
+log "Decode with models exported by torch.jit.script()"
+
+./zipformer/jit_pretrained.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --nn-model-filename $repo/exp/jit_script.pt \
+  $repo/test_wavs/1089-134686-0001.wav \
+  $repo/test_wavs/1221-135766-0001.wav \
+  $repo/test_wavs/1221-135766-0002.wav
+
+for method in greedy_search modified_beam_search fast_beam_search; do
+  log "$method"
+
+  ./zipformer/pretrained.py \
+    --method $method \
+    --beam-size 4 \
+    --checkpoint $repo/exp/pretrained.pt \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
+    $repo/test_wavs/1089-134686-0001.wav \
+    $repo/test_wavs/1221-135766-0001.wav \
+    $repo/test_wavs/1221-135766-0002.wav
+done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
+  mkdir -p zipformer/exp
+  ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh zipformer/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=100
+
+  for method in greedy_search fast_beam_search modified_beam_search; do
+    log "Decoding with $method"
+
+    ./zipformer/decode.py \
+      --decoding-method $method \
+      --epoch 999 \
+      --avg 1 \
+      --use-averaged-model 0 \
+      --max-duration $max_duration \
+      --exp-dir zipformer/exp
+  done
+
+  rm zipformer/exp/*.pt
+fi
--- a/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh
+++ b/.github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh
@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/librispeech/ASR
+
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13
+
+log "Downloading pre-trained model from $repo_url"
+git lfs install
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+log "Display test files"
+tree $repo/
+ls -lh $repo/test_wavs/*.wav
+
+pushd $repo/exp
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "data/lang_bpe_500/HLG.pt"
+git lfs pull --include "data/lang_bpe_500/L.pt"
+git lfs pull --include "data/lang_bpe_500/LG.pt"
+git lfs pull --include "data/lang_bpe_500/Linv.pt"
+git lfs pull --include "data/lm/G_4_gram.pt"
+git lfs pull --include "exp/jit_script.pt"
+git lfs pull --include "exp/pretrained.pt"
+ln -s pretrained.pt epoch-99.pt
+ls -lh *.pt
+popd
+
+log "Export to torchscript model"
+./zipformer/export.py \
+  --exp-dir $repo/exp \
+  --use-transducer 1 \
+  --use-ctc 1 \
+  --use-averaged-model false \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --jit 1
+
+ls -lh $repo/exp/*.pt
+
+log "Decode with models exported by torch.jit.script()"
+
+for method in ctc-decoding 1best; do
+  ./zipformer/jit_pretrained_ctc.py \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
+    --model-filename $repo/exp/jit_script.pt \
+    --HLG $repo/data/lang_bpe_500/HLG.pt \
+    --words-file $repo/data/lang_bpe_500/words.txt  \
+    --G $repo/data/lm/G_4_gram.pt \
+    --method $method \
+    --sample-rate 16000 \
+    $repo/test_wavs/1089-134686-0001.wav \
+    $repo/test_wavs/1221-135766-0001.wav \
+    $repo/test_wavs/1221-135766-0002.wav
+done
+
+for method in ctc-decoding 1best; do
+  log "$method"
+
+  ./zipformer/pretrained_ctc.py \
+    --use-transducer 1 \
+    --use-ctc 1 \
+    --method $method \
+    --checkpoint $repo/exp/pretrained.pt \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
+    --words-file $repo/data/lang_bpe_500/words.txt  \
+    --HLG $repo/data/lang_bpe_500/HLG.pt \
+    --G $repo/data/lm/G_4_gram.pt \
+    --words-file $repo/data/lang_bpe_500/words.txt  \
+    --sample-rate 16000 \
+    $repo/test_wavs/1089-134686-0001.wav \
+    $repo/test_wavs/1221-135766-0001.wav \
+    $repo/test_wavs/1221-135766-0002.wav
+done
+
+echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
+echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
+if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
+  mkdir -p zipformer/exp
+  ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-999.pt
+  ln -s $PWD/$repo/data/lang_bpe_500 data/
+
+  ls -lh data
+  ls -lh zipformer/exp
+
+  log "Decoding test-clean and test-other"
+
+  # use a small value for decoding with CPU
+  max_duration=100
+
+  for method in ctc-decoding 1best; do
+    log "Decoding with $method"
+
+    ./zipformer/ctc_decode.py \
+      --use-transducer 1 \
+      --use-ctc 1 \
+      --decoding-method $method \
+      --nbest-scale 1.0 \
+      --hlg-scale 0.6 \
+      --epoch 999 \
+      --avg 1 \
+      --use-averaged-model 0 \
+      --max-duration $max_duration \
+      --exp-dir zipformer/exp
+  done
+
+  rm zipformer/exp/*.pt
+fi
--- a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
+++ b/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
@ -18,7 +18,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/run-pre-trained-conformer-ctc.sh
+++ b/.github/scripts/run-pre-trained-conformer-ctc.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.flac
 ls -lh $repo/test_wavs/*.flac

 log "CTC decoding"
--- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer.sh
+++ b/.github/scripts/run-pre-trained-transducer.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 log "Beam search decoding"
--- a/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
+++ b/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
@ -20,7 +20,6 @@ repo=$(basename $repo_url)

 log "Display test files"
 tree $repo/
-soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav

 pushd $repo/exp
--- a/.github/scripts/test-ncnn-export.sh
+++ b/.github/scripts/test-ncnn-export.sh
@ -0,0 +1,234 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+pushd egs/librispeech/ASR
+
+log  "Install ncnn and pnnx"
+
+# We are using a modified ncnn here. Will try to merge it to the official repo
+# of ncnn
+git clone https://github.com/csukuangfj/ncnn
+pushd ncnn
+git submodule init
+git submodule update python/pybind11
+python3 setup.py bdist_wheel
+ls -lh dist/
+pip install dist/*.whl
+cd tools/pnnx
+mkdir build
+cd build
+
+echo "which python3"
+
+which python3
+#/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
+
+cmake -D Python3_EXECUTABLE=$(which python3) ..
+make -j4 pnnx
+
+./src/pnnx || echo "pass"
+
+popd
+
+export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
+
+log "=========================================================================="
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
+
+cd exp
+ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.trace()"
+
+./conv_emformer_transducer_stateless2/export-for-ncnn.py \
+  --exp-dir $repo/exp \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --use-averaged-model 0 \
+  \
+  --num-encoder-layers 12 \
+  --chunk-length 32 \
+  --cnn-module-kernel 31 \
+  --left-context-length 32 \
+  --right-context-length 8 \
+  --memory-size 32
+
+pnnx $repo/exp/encoder_jit_trace-pnnx.pt
+pnnx $repo/exp/decoder_jit_trace-pnnx.pt
+pnnx $repo/exp/joiner_jit_trace-pnnx.pt
+
+python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
+  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
+  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
+  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
+  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
+  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
+  $repo/test_wavs/1089-134686-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
+
+cd exp
+ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.trace()"
+
+./lstm_transducer_stateless2/export-for-ncnn.py \
+  --exp-dir $repo/exp \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --use-averaged-model 0
+
+pnnx $repo/exp/encoder_jit_trace-pnnx.pt
+pnnx $repo/exp/decoder_jit_trace-pnnx.pt
+pnnx $repo/exp/joiner_jit_trace-pnnx.pt
+
+python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
+  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
+  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
+  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
+  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
+  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
+  $repo/test_wavs/1089-134686-0001.wav
+
+python3 ./lstm_transducer_stateless2/ncnn-decode.py \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
+  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
+  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
+  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
+  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
+  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
+  $repo/test_wavs/1089-134686-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained.pt"
+
+cd exp
+ln -s pretrained.pt epoch-99.pt
+popd
+
+./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --exp-dir $repo/exp \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  \
+  --decode-chunk-len 32 \
+  --num-encoder-layers "2,4,3,2,4" \
+  --feedforward-dims "1024,1024,2048,2048,1024" \
+  --nhead "8,8,8,8,8" \
+  --encoder-dims "384,384,384,384,384" \
+  --attention-dims "192,192,192,192,192" \
+  --encoder-unmasked-dims "256,256,256,256,256" \
+  --zipformer-downsampling-factors "1,2,4,8,2" \
+  --cnn-module-kernels "31,31,31,31,31" \
+  --decoder-dim 512 \
+  --joiner-dim 512
+
+pnnx $repo/exp/encoder_jit_trace-pnnx.pt
+pnnx $repo/exp/decoder_jit_trace-pnnx.pt
+pnnx $repo/exp/joiner_jit_trace-pnnx.pt
+
+python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
+  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
+  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
+  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
+  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
+  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
+  $repo/test_wavs/1089-134686-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_char_bpe/L.pt"
+git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
+git lfs pull --include "data/lang_char_bpe/Linv.pt"
+git lfs pull --include "exp/pretrained.pt"
+
+cd exp
+ln -s pretrained.pt epoch-99.pt
+popd
+
+./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
+  --lang-dir $repo/data/lang_char_bpe \
+  --exp-dir $repo/exp \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --decode-chunk-len 32 \
+  --num-encoder-layers "2,4,3,2,4" \
+  --feedforward-dims "1024,1024,1536,1536,1024" \
+  --nhead "8,8,8,8,8" \
+  --encoder-dims "384,384,384,384,384" \
+  --attention-dims "192,192,192,192,192" \
+  --encoder-unmasked-dims "256,256,256,256,256" \
+  --zipformer-downsampling-factors "1,2,4,8,2" \
+  --cnn-module-kernels "31,31,31,31,31" \
+  --decoder-dim 512 \
+  --joiner-dim 512
+
+pnnx $repo/exp/encoder_jit_trace-pnnx.pt
+pnnx $repo/exp/decoder_jit_trace-pnnx.pt
+pnnx $repo/exp/joiner_jit_trace-pnnx.pt
+
+python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
+  --tokens $repo/data/lang_char_bpe/tokens.txt \
+  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
+  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
+  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
+  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
+  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
+  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
+  $repo/test_wavs/0.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
--- a/.github/scripts/test-onnx-export.sh
+++ b/.github/scripts/test-onnx-export.sh
@ -0,0 +1,351 @@
+#!/usr/bin/env bash
+
+set -e
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/librispeech/ASR
+
+
+
+log "=========================================================================="
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+log "Downloading pre-trained model from $repo_url"
+git lfs install
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained.pt"
+cd exp
+ln -s pretrained.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.trace()"
+
+./pruned_transducer_stateless7_streaming/jit_trace_export.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --decode-chunk-len 32 \
+  --exp-dir $repo/exp/
+
+log "Test exporting to ONNX format"
+
+./pruned_transducer_stateless7_streaming/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --decode-chunk-len 32 \
+  --exp-dir $repo/exp/
+
+ls -lh $repo/exp
+
+log "Run onnx_check.py"
+
+./pruned_transducer_stateless7_streaming/onnx_check.py \
+  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
+  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
+  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
+  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
+
+log "Run onnx_pretrained.py"
+
+./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1089-134686-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
+log "Downloading pre-trained model from $repo_url"
+git lfs install
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
+
+cd exp
+ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
+popd
+
+log "Export via torch.jit.script()"
+
+./pruned_transducer_stateless3/export.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 9999 \
+  --avg 1 \
+  --exp-dir $repo/exp/ \
+  --jit 1
+
+log "Test exporting to ONNX format"
+
+./pruned_transducer_stateless3/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 9999 \
+  --avg 1 \
+  --exp-dir $repo/exp/
+
+ls -lh $repo/exp
+
+log "Run onnx_check.py"
+
+./pruned_transducer_stateless3/onnx_check.py \
+  --jit-filename $repo/exp/cpu_jit.pt \
+  --onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
+  --onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
+  --onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
+
+log "Run onnx_pretrained.py"
+
+./pruned_transducer_stateless3/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1089-134686-0001.wav \
+  $repo/test_wavs/1221-135766-0001.wav \
+  $repo/test_wavs/1221-135766-0002.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+
+log "=========================================================================="
+repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
+
+cd exp
+ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.script()"
+
+./pruned_transducer_stateless5/export.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --use-averaged-model 0 \
+  --exp-dir $repo/exp \
+  --num-encoder-layers 18 \
+  --dim-feedforward 2048 \
+  --nhead 8 \
+  --encoder-dim 512 \
+  --decoder-dim 512 \
+  --joiner-dim 512 \
+  --jit 1
+
+log "Test exporting to ONNX format"
+
+./pruned_transducer_stateless5/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --epoch 99 \
+  --avg 1 \
+  --use-averaged-model 0 \
+  --exp-dir $repo/exp \
+  --num-encoder-layers 18 \
+  --dim-feedforward 2048 \
+  --nhead 8 \
+  --encoder-dim 512 \
+  --decoder-dim 512 \
+  --joiner-dim 512
+
+ls -lh $repo/exp
+
+log "Run onnx_check.py"
+
+./pruned_transducer_stateless5/onnx_check.py \
+  --jit-filename $repo/exp/cpu_jit.pt \
+  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
+
+log "Run onnx_pretrained.py"
+
+./pruned_transducer_stateless5/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1089-134686-0001.wav \
+  $repo/test_wavs/1221-135766-0001.wav \
+  $repo/test_wavs/1221-135766-0002.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained.pt"
+
+cd exp
+ln -s pretrained.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.script()"
+
+./pruned_transducer_stateless7/export.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --exp-dir $repo/exp \
+  --feedforward-dims "1024,1024,2048,2048,1024" \
+  --jit 1
+
+log "Test exporting to ONNX format"
+
+./pruned_transducer_stateless7/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --exp-dir $repo/exp \
+  --feedforward-dims "1024,1024,2048,2048,1024"
+
+ls -lh $repo/exp
+
+log "Run onnx_check.py"
+
+./pruned_transducer_stateless7/onnx_check.py \
+  --jit-filename $repo/exp/cpu_jit.pt \
+  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
+
+log "Run onnx_pretrained.py"
+
+./pruned_transducer_stateless7/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1089-134686-0001.wav \
+  $repo/test_wavs/1221-135766-0001.wav \
+  $repo/test_wavs/1221-135766-0002.wav
+
+log "=========================================================================="
+repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
+
+cd exp
+ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
+popd
+
+log "Test exporting to ONNX format"
+
+./conv_emformer_transducer_stateless2/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --exp-dir $repo/exp \
+  --num-encoder-layers 12 \
+  --chunk-length 32 \
+  --cnn-module-kernel 31 \
+  --left-context-length 32 \
+  --right-context-length 8 \
+  --memory-size 32
+
+log "Run onnx_pretrained.py"
+
+./conv_emformer_transducer_stateless2/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1221-135766-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
+
+log "=========================================================================="
+repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
+GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+repo=$(basename $repo_url)
+
+pushd $repo
+git lfs pull --include "data/lang_bpe_500/bpe.model"
+git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
+
+cd exp
+ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
+popd
+
+log "Export via torch.jit.trace()"
+
+./lstm_transducer_stateless2/export.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --exp-dir $repo/exp/ \
+  --jit-trace 1
+
+log "Test exporting to ONNX format"
+
+./lstm_transducer_stateless2/export-onnx.py \
+  --bpe-model $repo/data/lang_bpe_500/bpe.model \
+  --use-averaged-model 0 \
+  --epoch 99 \
+  --avg 1 \
+  --exp-dir $repo/exp
+
+ls -lh $repo/exp
+
+log "Run onnx_check.py"
+
+./lstm_transducer_stateless2/onnx_check.py \
+  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
+  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
+  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
+  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
+
+log "Run onnx_pretrained.py"
+
+./lstm_transducer_stateless2/onnx_pretrained.py \
+  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+  --tokens $repo/data/lang_bpe_500/tokens.txt \
+  $repo/test_wavs/1221-135766-0001.wav
+
+rm -rf $repo
+log "--------------------------------------------------------------------------"
--- a/.github/workflows/run-aishell-2022-06-20.yml
+++ b/.github/workflows/run-aishell-2022-06-20.yml
@ -65,7 +65,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -73,7 +73,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -87,7 +87,7 @@ jobs:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-gigaspeech-2022-05-13.yml
+++ b/.github/workflows/run-gigaspeech-2022-05-13.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
--- a/.github/workflows/run-librispeech-2022-03-12.yml
+++ b/.github/workflows/run-librispeech-2022-03-12.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-04-29.yml
+++ b/.github/workflows/run-librispeech-2022-04-29.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-2022-05-13.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
+++ b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
@ -39,7 +39,7 @@ concurrency:

 jobs:
  run_librispeech_2022_11_11_zipformer:
-    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
+++ b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml
+++ b/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml
@ -60,7 +60,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -68,7 +68,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -119,7 +119,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml
+++ b/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml
+++ b/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml
@ -35,7 +35,7 @@ on:

 jobs:
  run_librispeech_2022_12_15_zipformer_ctc_bs:
-    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -60,7 +60,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -68,7 +68,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -119,7 +119,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml
+++ b/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
+++ b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
@ -22,7 +22,7 @@ concurrency:

 jobs:
  run_librispeech_lstm_transducer_stateless2_2022_09_03:
-    if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'ncnn' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -47,7 +47,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -55,7 +55,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -106,7 +106,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
@ -39,7 +39,7 @@ concurrency:

 jobs:
  run_librispeech_pruned_transducer_stateless3_2022_05_13:
-    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
+++ b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml
+++ b/.github/workflows/run-librispeech-streaming-zipformer-2023-05-18.yml
@ -0,0 +1,174 @@
+# Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
+
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: run-librispeech-streaming-zipformer-2023-05-18
+# zipformer
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [labeled]
+
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
+concurrency:
+  group: run_librispeech_2023_05_18_streaming_zipformer-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_librispeech_2023_05_18_streaming_zipformer:
+    if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements-ci.txt'
+
+      - name: Install Python dependencies
+        run: |
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          pip uninstall -y protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+      - name: Cache kaldifeat
+        id: my-cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/kaldifeat
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
+
+      - name: Install kaldifeat
+        if: steps.my-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other-v2
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+
+      - name: Inference with pre-trained model
+        shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
+        run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
+          sudo apt-get -qq install git-lfs tree
+          export PYTHONPATH=$PWD:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
+          .github/scripts/run-librispeech-streaming-zipformer-2023-05-18.sh
+
+      - name: Display decoding results for librispeech zipformer
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR/
+          tree ./zipformer/exp
+
+          cd zipformer
+
+          echo "results for zipformer, simulated streaming decoding"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===modified beam search==="
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "results for zipformer, chunk-wise streaming decoding"
+          echo "===greedy search==="
+          find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===modified beam search==="
+          find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+
+      - name: Upload decoding results for librispeech zipformer
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11
+          path: egs/librispeech/ASR/zipformer/exp/
--- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
+++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
@ -64,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +72,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -123,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-zipformer-2023-05-18.yml
+++ b/.github/workflows/run-librispeech-zipformer-2023-05-18.yml
@ -0,0 +1,159 @@
+# Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
+
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: run-librispeech-zipformer-2023-05-18
+# zipformer
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [labeled]
+
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
+concurrency:
+  group: run_librispeech_2023_05_18_zipformer-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_librispeech_2023_05_18_zipformer:
+    if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements-ci.txt'
+
+      - name: Install Python dependencies
+        run: |
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          pip uninstall -y protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+      - name: Cache kaldifeat
+        id: my-cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/kaldifeat
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
+
+      - name: Install kaldifeat
+        if: steps.my-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other-v2
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+
+      - name: Inference with pre-trained model
+        shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
+        run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
+          sudo apt-get -qq install git-lfs tree
+          export PYTHONPATH=$PWD:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
+          .github/scripts/run-librispeech-zipformer-2023-05-18.sh
+
+      - name: Display decoding results for librispeech zipformer
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR/
+          tree ./zipformer/exp
+
+          cd zipformer
+          echo "results for zipformer"
+          echo "===greedy search==="
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===fast_beam_search==="
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===modified beam search==="
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+      - name: Upload decoding results for librispeech zipformer
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11
+          path: egs/librispeech/ASR/zipformer/exp/
--- a/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml
+++ b/.github/workflows/run-librispeech-zipformer-ctc-2023-06-14.yml
@ -0,0 +1,155 @@
+# Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
+
+# See ../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: run-librispeech-zipformer-ctc-2023-06-14
+# zipformer
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [labeled]
+
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
+concurrency:
+  group: run_librispeech_2023_06_14_zipformer-ctc-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_librispeech_2023_06_14_zipformer_ctc:
+    if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements-ci.txt'
+
+      - name: Install Python dependencies
+        run: |
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          pip uninstall -y protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+      - name: Cache kaldifeat
+        id: my-cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/kaldifeat
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
+
+      - name: Install kaldifeat
+        if: steps.my-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Cache LibriSpeech test-clean and test-other datasets
+        id: libri-test-clean-and-test-other-data
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/download
+          key: cache-libri-test-clean-and-test-other
+
+      - name: Download LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+
+      - name: Prepare manifests for LibriSpeech test-clean and test-other
+        shell: bash
+        run: |
+          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+
+      - name: Cache LibriSpeech test-clean and test-other fbank features
+        id: libri-test-clean-and-test-other-fbank
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/fbank-libri
+          key: cache-libri-fbank-test-clean-and-test-other-v2
+
+      - name: Compute fbank for LibriSpeech test-clean and test-other
+        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+
+      - name: Inference with pre-trained model
+        shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
+        run: |
+          mkdir -p egs/librispeech/ASR/data
+          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
+          ls -lh egs/librispeech/ASR/data/*
+
+          sudo apt-get -qq install git-lfs tree
+          export PYTHONPATH=$PWD:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
+          .github/scripts/run-librispeech-zipformer-ctc-2023-06-14.sh
+
+      - name: Display decoding results for librispeech zipformer
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        shell: bash
+        run: |
+          cd egs/librispeech/ASR/
+          tree ./zipformer/exp
+
+          cd zipformer
+          echo "results for zipformer"
+          echo "===ctc-decoding==="
+          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+          echo "===1best==="
+          find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+
+      - name: Upload decoding results for librispeech zipformer
+        uses: actions/upload-artifact@v2
+        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
+        with:
+          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer-2022-11-11
+          path: egs/librispeech/ASR/zipformer/exp/
--- a/.github/workflows/run-pretrained-conformer-ctc.yml
+++ b/.github/workflows/run-pretrained-conformer-ctc.yml
@ -54,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -62,7 +62,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -73,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
@ -63,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -71,7 +71,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -122,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
@ -63,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -71,7 +71,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -122,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
@ -54,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -62,7 +62,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -73,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
@ -54,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -62,7 +62,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -73,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless.yml
@ -63,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -71,7 +71,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -122,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*

-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer.yml
+++ b/.github/workflows/run-pretrained-transducer.yml
@ -54,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -62,7 +62,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -73,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-ptb-rnn-lm.yml
+++ b/.github/workflows/run-ptb-rnn-lm.yml
@ -47,7 +47,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Prepare data
        shell: bash
--- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
+++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
@ -54,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -62,7 +62,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -76,7 +76,7 @@ jobs:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@ -35,7 +35,7 @@ jobs:
      matrix:
        # os: [ubuntu-18.04, macos-10.15]
        # TODO: enable macOS for CPU testing
-        os: [ubuntu-18.04]
+        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false

@ -67,7 +67,9 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+          pip install --no-deps --force-reinstall https://huggingface.co/csukuangfj/k2/resolve/main/cpu/k2-1.24.3.dev20230508+cpu.torch1.13.1-cp38-cp38-linux_x86_64.whl

      - name: Run yesno recipe
        shell: bash
--- a/.github/workflows/run-librispeech-conv-emformer-transducer-stateless2-2022-12-05.yml
+++ b/.github/workflows/run-librispeech-conv-emformer-transducer-stateless2-2022-12-05.yml
@ -1,4 +1,4 @@
-name: run-librispeech-conv-emformer-transducer-stateless2-2022-12-05
+name: test-ncnn-export

 on:
  push:
@ -16,15 +16,18 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"

+concurrency:
+  group: test_ncnn_export-${{ github.ref }}
+  cancel-in-progress: true
+
 jobs:
-  run_librispeech_conv_emformer_transducer_stateless2_2022_12_05:
+  test_ncnn_export:
    if: github.event.label.name == 'ready' || github.event.label.name == 'ncnn' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
-
      fail-fast: false

    steps:
@ -41,9 +44,9 @@ jobs:

      - name: Install Python dependencies
        run: |
-          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

      - name: Cache kaldifeat
        id: my-cache
@ -51,7 +54,7 @@ jobs:
        with:
          path: |
            ~/tmp/kaldifeat
-          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22

      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
@ -59,19 +62,14 @@ jobs:
        run: |
          .github/scripts/install-kaldifeat.sh

-      - name: Inference with pre-trained model
+      - name: Test ncnn export
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
-          mkdir -p egs/librispeech/ASR/data
-          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
-          ls -lh egs/librispeech/ASR/data/*
-
-          sudo apt-get -qq install git-lfs tree sox
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH

-          .github/scripts/run-librispeech-conv-emformer-transducer-stateless2-2022-12-05.sh
+          .github/scripts/test-ncnn-export.sh
--- a/.github/workflows/test-onnx-export.yml
+++ b/.github/workflows/test-onnx-export.yml
@ -0,0 +1,75 @@
+name: test-onnx-export
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    types: [labeled]
+
+  schedule:
+    # minute (0-59)
+    # hour (0-23)
+    # day of the month (1-31)
+    # month (1-12)
+    # day of the week (0-6)
+    # nightly build at 15:50 UTC time every day
+    - cron: "50 15 * * *"
+
+concurrency:
+  group: test_onnx_export-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test_onnx_export:
+    if: github.event.label.name == 'ready' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [3.8]
+      fail-fast: false
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+          cache-dependency-path: '**/requirements-ci.txt'
+
+      - name: Install Python dependencies
+        run: |
+          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          pip uninstall -y protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
+
+      - name: Cache kaldifeat
+        id: my-cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/tmp/kaldifeat
+          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
+
+      - name: Install kaldifeat
+        if: steps.my-cache.outputs.cache-hit != 'true'
+        shell: bash
+        run: |
+          .github/scripts/install-kaldifeat.sh
+
+      - name: Test ONNX export
+        shell: bash
+        env:
+          GITHUB_EVENT_NAME: ${{ github.event_name }}
+          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
+        run: |
+          export PYTHONPATH=$PWD:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
+          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
+
+          .github/scripts/test-onnx-export.sh
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -56,7 +56,7 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
-          sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
+          sudo apt install -q -y --fix-missing libsox-dev libsox-fmt-all

      - name: Install Python dependencies
        run: |
@ -70,7 +70,7 @@ jobs:
          pip install git+https://github.com/lhotse-speech/lhotse
          # icefall requirements
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*

          pip install kaldifst
          pip install onnxruntime
@ -113,14 +113,15 @@ jobs:
          cd ../pruned_transducer_stateless4
          pytest -v -s

+          echo $PYTHONPATH
          cd ../pruned_transducer_stateless7
          pytest -v -s

          cd ../transducer_stateless
          pytest -v -s

-          cd ../transducer
-          pytest -v -s
+          # cd ../transducer
+          # pytest -v -s

          cd ../transducer_stateless2
          pytest -v -s
@ -157,8 +158,8 @@ jobs:
          cd ../transducer_stateless
          pytest -v -s

-          cd ../transducer
-          pytest -v -s
+          # cd ../transducer
+          # pytest -v -s

          cd ../transducer_stateless2
          pytest -v -s
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -26,7 +26,7 @@ repos:
      # E121,E123,E126,E226,E24,E704,W503,W504

  - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+    rev: 5.11.5
    hooks:
      - id: isort
        args: ["--profile=black"]
--- a/9
+++ b/9
@ -1,13 +1,4 @@

-                                 Legal Notices
-
-   NOTE (this is not from the Apache License): The copyright model is that
-   authors (or their employers, if noted in individual files) own their
-   individual contributions. The authors' contributions can be discerned
-   from the git history.
-
- -------------------------------------------------------------------------
-
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
--- a/README.md
+++ b/README.md
@ -28,14 +28,15 @@ We provide the following recipes:

  - [yesno][yesno]
  - [LibriSpeech][librispeech]
+  - [GigaSpeech][gigaspeech]
  - [Aishell][aishell]
+  - [Aishell2][aishell2]
+  - [Aishell4][aishell4]
  - [TIMIT][timit]
  - [TED-LIUM3][tedlium3]
-  - [GigaSpeech][gigaspeech]
  - [Aidatatang_200zh][aidatatang_200zh]
  - [WenetSpeech][wenetspeech]
  - [Alimeeting][alimeeting]
-  - [Aishell4][aishell4]
  - [TAL_CSASR][tal_csasr]

 ### yesno
@ -46,9 +47,7 @@ Training takes less than 30 seconds and gives you the following WER:
 ```
 [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
 ```
-We do provide a Colab notebook for this recipe.
-
-[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)
+We provide a Colab notebook for this recipe: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)


 ### LibriSpeech
@ -56,12 +55,13 @@ We do provide a Colab notebook for this recipe.
 Please see <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md>
 for the **latest** results.

-We provide 4 models for this recipe:
+We provide 5 models for this recipe:

 - [conformer CTC model][LibriSpeech_conformer_ctc]
 - [TDNN LSTM CTC model][LibriSpeech_tdnn_lstm_ctc]
 - [Transducer: Conformer encoder + LSTM decoder][LibriSpeech_transducer]
 - [Transducer: Conformer encoder + Embedding decoder][LibriSpeech_transducer_stateless]
+- [Transducer: Zipformer encoder + Embedding decoder][LibriSpeech_zipformer]

 #### Conformer CTC Model

@ -82,7 +82,7 @@ The WER for this model is:
 |-----|------------|------------|
 | WER | 6.59       | 17.69      |

-We provide a Colab notebook to run a pre-trained TDNN LSTM CTC model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1-iSfQMp2So-We_Uu49N4AAcMInB72u9z?usp=sharing)
+We provide a Colab notebook to run a pre-trained TDNN LSTM CTC model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1-iSfQMp2So-We_Uu49N4AAcMInB72u9z?usp=sharing)


 #### Transducer: Conformer encoder + LSTM decoder
@ -116,21 +116,58 @@ We provide a Colab notebook to run a pre-trained transducer conformer + stateles

 #### k2 pruned RNN-T

-|     | test-clean | test-other |
-|-----|------------|------------|
-| WER | 2.57       | 5.95       |
+| Encoder         | Params | test-clean | test-other |
+|-----------------|--------|------------|------------|
+| zipformer       | 65.5M  | 2.21       | 4.91       |
+| zipformer-small | 23.2M  | 2.46       | 5.83       |
+| zipformer-large | 148.4M | 2.11       | 4.77       |
+
+Note: No auxiliary losses are used in the training and no LMs are used
+in the decoding.

 #### k2 pruned RNN-T + GigaSpeech

 |     | test-clean | test-other |
 |-----|------------|------------|
-| WER | 2.00       | 4.63       |
+| WER | 1.78       | 4.08       |
+
+Note: No auxiliary losses are used in the training and no LMs are used
+in the decoding.
+
+#### k2 pruned RNN-T + GigaSpeech + CommonVoice
+
+|     | test-clean | test-other |
+|-----|------------|------------|
+| WER | 1.90       | 3.98       |
+
+Note: No auxiliary losses are used in the training and no LMs are used
+in the decoding.
+
+
+### GigaSpeech
+
+We provide two models for this recipe: [Conformer CTC model][GigaSpeech_conformer_ctc]
+and [Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][GigaSpeech_pruned_transducer_stateless2].
+
+#### Conformer CTC
+
+|     |  Dev  | Test  |
+|-----|-------|-------|
+| WER | 10.47 | 10.58 |
+
+#### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss
+
+|                      |  Dev  | Test  |
+|----------------------|-------|-------|
+|    greedy search     | 10.51 | 10.73 |
+|   fast beam search   | 10.50 | 10.69 |
+| modified beam search | 10.40 | 10.51 |


 ### Aishell

-We provide two models for this recipe: [conformer CTC model][Aishell_conformer_ctc]
-and [TDNN LSTM CTC model][Aishell_tdnn_lstm_ctc].
+We provide three models for this recipe: [conformer CTC model][Aishell_conformer_ctc],
+[TDNN LSTM CTC model][Aishell_tdnn_lstm_ctc], and [Transducer Stateless Model][Aishell_pruned_transducer_stateless7],

 #### Conformer CTC Model

@ -140,20 +177,6 @@ The best CER we currently have is:
 |-----|------|
 | CER | 4.26 |

-
-We provide a Colab notebook to run a pre-trained conformer CTC model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg](https://colab.research.google.com/drive/1WnG17io5HEZ0Gn_cnh_VzK5QYOoiiklC?usp=sharing)
-
-#### Transducer Stateless Model
-
-The best CER we currently have is:
-
-|     | test |
-|-----|------|
-| CER | 4.68 |
-
-
-We provide a Colab notebook to run a pre-trained TransducerStateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14XaT2MhnBkK-3_RqqWq3K90Xlbin-GZC?usp=sharing)
-
 #### TDNN LSTM CTC Model

 The CER for this model is:
@ -164,6 +187,46 @@ The CER for this model is:

 We provide a Colab notebook to run a pre-trained TDNN LSTM CTC model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1jbyzYq3ytm6j2nlEt-diQm-6QVWyDDEa?usp=sharing)

+#### Transducer Stateless Model
+
+The best CER we currently have is:
+
+|     | test |
+|-----|------|
+| CER | 4.38 |
+
+We provide a Colab notebook to run a pre-trained TransducerStateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14XaT2MhnBkK-3_RqqWq3K90Xlbin-GZC?usp=sharing)
+
+
+### Aishell2
+
+We provide one model for this recipe: [Transducer Stateless Model][Aishell2_pruned_transducer_stateless5].
+
+#### Transducer Stateless Model
+
+The best WER we currently have is:
+
+|     |   dev-ios  |  test-ios  |
+|-----|------------|------------|
+| WER |    5.32    |    5.56    |
+
+
+### Aishell4
+
+We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][Aishell4_pruned_transducer_stateless5].
+
+#### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss (trained with all subsets)
+
+The best CER we currently have is:
+
+|     |   test     |
+|-----|------------|
+| CER |   29.08    |
+
+
+We provide a Colab notebook to run a pre-trained Pruned Transducer Stateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
+
+
 ### TIMIT

 We provide two models for this recipe: [TDNN LSTM CTC model][TIMIT_tdnn_lstm_ctc]
@ -187,7 +250,8 @@ The PER for this model is:
 |--|--|
 |PER| 17.66% |

-We provide a Colab notebook to run a pre-trained TDNN LiGRU CTC model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/11IT-k4HQIgQngXz1uvWsEYktjqQt7Tmb?usp=sharing)
+We provide a Colab notebook to run a pre-trained TDNN LiGRU CTC model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
+

 ### TED-LIUM3

@ -215,24 +279,6 @@ The best WER using modified beam search with beam size 4 is:

 We provide a Colab notebook to run a pre-trained Pruned Transducer Stateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1je_1zGrOkGVVd4WLzgkXRHxl-I27yWtz?usp=sharing)

-### GigaSpeech
-
-We provide two models for this recipe: [Conformer CTC model][GigaSpeech_conformer_ctc]
-and [Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][GigaSpeech_pruned_transducer_stateless2].
-
-#### Conformer CTC
-
-|     |  Dev  | Test  |
-|-----|-------|-------|
-| WER | 10.47 | 10.58 |
-
-#### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss
-
-|                      |  Dev  | Test  |
-|----------------------|-------|-------|
-|    greedy search     | 10.51 | 10.73 |
-|   fast beam search   | 10.50 | 10.69 |
-| modified beam search | 10.40 | 10.51 |

 ### Aidatatang_200zh

@ -248,6 +294,7 @@ We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder

 We provide a Colab notebook to run a pre-trained Pruned Transducer Stateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1wNSnSj3T5oOctbh5IGCa393gKOoQw2GH?usp=sharing)

+
 ### WenetSpeech

 We provide some models for this recipe: [Pruned stateless RNN-T_2: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][WenetSpeech_pruned_transducer_stateless2] and [Pruned stateless RNN-T_5: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][WenetSpeech_pruned_transducer_stateless5].
@ -284,20 +331,6 @@ We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder

 We provide a Colab notebook to run a pre-trained Pruned Transducer Stateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tKr3f0mL17uO_ljdHGKtR7HOmthYHwJG?usp=sharing)

-### Aishell4
-
-We provide one model for this recipe: [Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss][Aishell4_pruned_transducer_stateless5].
-
-#### Pruned stateless RNN-T: Conformer encoder + Embedding decoder + k2 pruned RNN-T loss (trained with all subsets)
-
-The best CER(%) results:
-|                      |  test  |
-|----------------------|--------|
-|    greedy search     | 29.89  |
-|   fast beam search   | 28.91  |
-| modified beam search | 29.08  |
-
-We provide a Colab notebook to run a pre-trained Pruned Transducer Stateless model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)

 ### TAL_CSASR

@ -331,8 +364,12 @@ Please see: [![Open In Colab](https://colab.research.google.com/assets/colab-bad
 [LibriSpeech_conformer_ctc]: egs/librispeech/ASR/conformer_ctc
 [LibriSpeech_transducer]: egs/librispeech/ASR/transducer
 [LibriSpeech_transducer_stateless]: egs/librispeech/ASR/transducer_stateless
+[LibriSpeech_zipformer]: egs/librispeech/ASR/zipformer
 [Aishell_tdnn_lstm_ctc]: egs/aishell/ASR/tdnn_lstm_ctc
 [Aishell_conformer_ctc]: egs/aishell/ASR/conformer_ctc
+[Aishell_pruned_transducer_stateless7]: egs/aishell/ASR/pruned_transducer_stateless7_bbpe
+[Aishell2_pruned_transducer_stateless5]: egs/aishell2/ASR/pruned_transducer_stateless5
+[Aishell4_pruned_transducer_stateless5]: egs/aishell4/ASR/pruned_transducer_stateless5
 [TIMIT_tdnn_lstm_ctc]: egs/timit/ASR/tdnn_lstm_ctc
 [TIMIT_tdnn_ligru_ctc]: egs/timit/ASR/tdnn_ligru_ctc
 [TED-LIUM3_transducer_stateless]: egs/tedlium3/ASR/transducer_stateless
@ -343,17 +380,17 @@ Please see: [![Open In Colab](https://colab.research.google.com/assets/colab-bad
 [WenetSpeech_pruned_transducer_stateless2]: egs/wenetspeech/ASR/pruned_transducer_stateless2
 [WenetSpeech_pruned_transducer_stateless5]: egs/wenetspeech/ASR/pruned_transducer_stateless5
 [Alimeeting_pruned_transducer_stateless2]: egs/alimeeting/ASR/pruned_transducer_stateless2
-[Aishell4_pruned_transducer_stateless5]: egs/aishell4/ASR/pruned_transducer_stateless5
 [TAL_CSASR_pruned_transducer_stateless5]: egs/tal_csasr/ASR/pruned_transducer_stateless5
 [yesno]: egs/yesno/ASR
 [librispeech]: egs/librispeech/ASR
 [aishell]: egs/aishell/ASR
+[aishell2]: egs/aishell2/ASR
+[aishell4]: egs/aishell4/ASR
 [timit]: egs/timit/ASR
 [tedlium3]: egs/tedlium3/ASR
 [gigaspeech]: egs/gigaspeech/ASR
 [aidatatang_200zh]: egs/aidatatang_200zh/ASR
 [wenetspeech]: egs/wenetspeech/ASR
 [alimeeting]: egs/alimeeting/ASR
-[aishell4]: egs/aishell4/ASR
 [tal_csasr]: egs/tal_csasr/ASR
 [k2]: https://github.com/k2-fsa/k2
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -78,3 +78,15 @@ html_context = {
 }

 todo_include_todos = True
+
+rst_epilog = """
+.. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
+.. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
+.. _icefall: https://github.com/k2-fsa/icefall
+.. _git-lfs: https://git-lfs.com/
+.. _ncnn: https://github.com/tencent/ncnn
+.. _LibriSpeech: https://www.openslr.org/12
+.. _musan: http://www.openslr.org/17/
+.. _ONNX: https://github.com/onnx/onnx
+.. _onnxruntime: https://github.com/microsoft/onnxruntime
+"""
--- a/docs/source/faqs.rst
+++ b/docs/source/faqs.rst
@ -0,0 +1,107 @@
+Frequently Asked Questions (FAQs)
+=================================
+
+In this section, we collect issues reported by users and post the corresponding
+solutions.
+
+
+OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
+-----------------------------------------------------------------------------------
+
+One user is using the following code to install ``torch`` and ``torchaudio``:
+
+.. code-block:: bash
+
+  pip install \
+    torch==1.10.0+cu111 \
+    torchvision==0.11.0+cu111 \
+    torchaudio==0.10.0 \
+    -f https://download.pytorch.org/whl/torch_stable.html
+
+and it throws the following error when running ``tdnn/train.py``:
+
+.. code-block::
+
+  OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
+
+The fix is to specify the CUDA version while installing ``torchaudio``. That
+is, change ``torchaudio==0.10.0`` to ``torchaudio==0.10.0+cu11```. Therefore,
+the correct command is:
+
+.. code-block:: bash
+
+  pip install \
+    torch==1.10.0+cu111 \
+    torchvision==0.11.0+cu111 \
+    torchaudio==0.10.0+cu111 \
+    -f https://download.pytorch.org/whl/torch_stable.html
+
+AttributeError: module 'distutils' has no attribute 'version'
+-------------------------------------------------------------
+
+The error log is:
+
+.. code-block::
+
+  Traceback (most recent call last):
+    File "./tdnn/train.py", line 14, in <module>
+      from asr_datamodule import YesNoAsrDataModule
+    File "/home/xxx/code/next-gen-kaldi/icefall/egs/yesno/ASR/tdnn/asr_datamodule.py", line 34, in <module>
+      from icefall.dataset.datamodule import DataModule
+    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/__init__.py", line 3, in <module>
+      from . import (
+    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/decode.py", line 23, in <module>
+      from icefall.utils import add_eos, add_sos, get_texts
+    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/utils.py", line 39, in <module>
+      from torch.utils.tensorboard import SummaryWriter
+    File "/home/xxx/tool/miniconda3/envs/yyy/lib/python3.8/site-packages/torch/utils/tensorboard/__init__.py", line 4, in <module>
+      LooseVersion = distutils.version.LooseVersion
+  AttributeError: module 'distutils' has no attribute 'version'
+
+The fix is:
+
+.. code-block:: bash
+
+  pip uninstall setuptools
+
+  pip install setuptools==58.0.4
+
+ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
+--------------------------------------------------------------------------------------------
+
+If you are using ``conda`` and encounter the following issue:
+
+.. code-block::
+
+  Traceback (most recent call last):
+    File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 24, in <module>
+      from _k2 import DeterminizeWeightPushingType
+  ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
+
+  During handling of the above exception, another exception occurred:
+
+  Traceback (most recent call last):
+    File "/k2-dev/yangyifan/icefall/egs/librispeech/ASR/./pruned_transducer_stateless7_ctc_bs/decode.py", line 104, in <module>
+      import k2
+    File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 30, in <module>
+      raise ImportError(
+  ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
+  Note: If you're using anaconda and importing k2 on MacOS,
+        you can probably fix this by setting the environment variable:
+    export DYLD_LIBRARY_PATH=$CONDA_PREFIX/lib/python3.10/site-packages:$DYLD_LIBRARY_PATH
+
+Please first try to find where ``libpython3.10.so.1.0`` locates.
+
+For instance,
+
+.. code-block:: bash
+
+  cd $CONDA_PREFIX/lib
+  find . -name "libpython*"
+
+If you are able to find it inside ``$CODNA_PREFIX/lib``, please set the
+following environment variable:
+
+.. code-block:: bash
+
+  export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -21,6 +21,7 @@ speech recognition recipes using `k2 <https://github.com/k2-fsa/k2>`_.
   :caption: Contents:

   installation/index
+   faqs
   model-export/index

 .. toctree::
--- a/docs/source/installation/index.rst
+++ b/docs/source/installation/index.rst
@ -3,64 +3,91 @@
 Installation
 ============

- |os|
- |device|
- |python_versions|
- |torch_versions|
- |k2_versions|

-.. |os| image:: ./images/os-Linux_macOS-ff69b4.svg
-  :alt: Supported operating systems
-
-.. |device| image:: ./images/device-CPU_CUDA-orange.svg
-  :alt: Supported devices
-
-.. |python_versions| image:: ./images/python-gt-v3.6-blue.svg
-  :alt: Supported python versions
-
-.. |torch_versions| image:: ./images/torch-gt-v1.6.0-green.svg
-  :alt: Supported PyTorch versions
-
-.. |k2_versions| image:: ./images/k2-gt-v1.9-blueviolet.svg
-  :alt: Supported k2 versions

 ``icefall`` depends on `k2 <https://github.com/k2-fsa/k2>`_ and
 `lhotse <https://github.com/lhotse-speech/lhotse>`_.

-We recommend you to use the following steps to install the dependencies.
+We recommend that you use the following steps to install the dependencies.

- (0) Install PyTorch and torchaudio
- (1) Install k2
- (2) Install lhotse
+- (0) Install CUDA toolkit and cuDNN
+- (1) Install PyTorch and torchaudio
+- (2) Install k2
+- (3) Install lhotse
+
+.. caution::
+
+   99% users who have issues about the installation are using conda.
+
+.. caution::
+
+   99% users who have issues about the installation are using conda.
+
+.. caution::
+
+   99% users who have issues about the installation are using conda.
+
+.. hint::
+
+   We suggest that you use ``pip install`` to install PyTorch.
+
+   You can use the following command to create a virutal environment in Python:
+
+    .. code-block:: bash
+
+        python3 -m venv ./my_env
+        source ./my_env/bin/activate

 .. caution::

  Installation order matters.

-(0) Install PyTorch and torchaudio
+(0) Install CUDA toolkit and cuDNN
+----------------------------------
+
+Please refer to
+`<https://k2-fsa.github.io/k2/installation/cuda-cudnn.html>`_
+to install CUDA and cuDNN.
+
+
+(1) Install PyTorch and torchaudio
 ----------------------------------

 Please refer `<https://pytorch.org/>`_ to install PyTorch
 and torchaudio.

+.. hint::

-(1) Install k2
+   You can also go to  `<https://download.pytorch.org/whl/torch_stable.html>`_
+   to download pre-compiled wheels and install them.
+
+.. caution::
+
+   Please install torch and torchaudio at the same time.
+
+
+(2) Install k2
 --------------

 Please refer to `<https://k2-fsa.github.io/k2/installation/index.html>`_
 to install ``k2``.

-.. CAUTION::
+.. caution::

-  You need to install ``k2`` with a version at least **v1.9**.
+  Please don't change your installed PyTorch after you have installed k2.

-.. HINT::
+.. note::

-  If you have already installed PyTorch and don't want to replace it,
-  please install a version of ``k2`` that is compiled against the version
-  of PyTorch you are using.
+   We suggest that you install k2 from source by following
+   `<https://k2-fsa.github.io/k2/installation/from_source.html>`_
+   or
+   `<https://k2-fsa.github.io/k2/installation/for_developers.html>`_.

-(2) Install lhotse
+.. hint::
+
+   Please always install the latest version of k2.
+
+(3) Install lhotse
 ------------------

 Please refer to `<https://lhotse.readthedocs.io/en/latest/getting-started.html#installation>`_
@ -75,8 +102,7 @@ to install ``lhotse``.

    to install the latest version of lhotse.

-
-(3) Download icefall
+(4) Download icefall
 --------------------

 ``icefall`` is a collection of Python scripts; what you need is to download it
@ -338,44 +364,42 @@ The log of running ``./prepare.sh`` is:

 .. code-block::

-  2021-08-23 19:27:26 (prepare.sh:24:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download
-  2021-08-23 19:27:26 (prepare.sh:27:main) stage 0: Download data
-  Downloading waves_yesno.tar.gz: 4.49MB [00:03, 1.39MB/s]
-  2021-08-23 19:27:30 (prepare.sh:36:main) Stage 1: Prepare yesno manifest
-  2021-08-23 19:27:31 (prepare.sh:42:main) Stage 2: Compute fbank for yesno
-  2021-08-23 19:27:32,803 INFO [compute_fbank_yesno.py:52] Processing train
-  Extracting and storing features: 100%|_______________________________________________________________| 90/90 [00:01<00:00, 80.57it/s]
-  2021-08-23 19:27:34,085 INFO [compute_fbank_yesno.py:52] Processing test
-  Extracting and storing features: 100%|______________________________________________________________| 30/30 [00:00<00:00, 248.21it/s]
-  2021-08-23 19:27:34 (prepare.sh:48:main) Stage 3: Prepare lang
-  2021-08-23 19:27:35 (prepare.sh:63:main) Stage 4: Prepare G
-  /tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
-  d(std::istream&):79
-  [I] Reading \data\ section.
-  /tmp/pip-install-fcordre9/kaldilm_6899d26f2d684ad48f21025950cd2866/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Rea
-  d(std::istream&):140
-  [I] Reading \1-grams: section.
-  2021-08-23 19:27:35 (prepare.sh:89:main) Stage 5: Compile HLG
-  2021-08-23 19:27:35,928 INFO [compile_hlg.py:120] Processing data/lang_phone
-  2021-08-23 19:27:35,929 INFO [lexicon.py:116] Converting L.pt to Linv.pt
-  2021-08-23 19:27:35,931 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3
-  2021-08-23 19:27:35,932 INFO [compile_hlg.py:52] Loading G.fst.txt
-  2021-08-23 19:27:35,932 INFO [compile_hlg.py:62] Intersecting L and G
-  2021-08-23 19:27:35,933 INFO [compile_hlg.py:64] LG shape: (4, None)
-  2021-08-23 19:27:35,933 INFO [compile_hlg.py:66] Connecting LG
-  2021-08-23 19:27:35,933 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None)
-  2021-08-23 19:27:35,933 INFO [compile_hlg.py:70] <class 'torch.Tensor'>
-  2021-08-23 19:27:35,933 INFO [compile_hlg.py:71] Determinizing LG
-  2021-08-23 19:27:35,934 INFO [compile_hlg.py:74] <class '_k2.RaggedInt'>
-  2021-08-23 19:27:35,934 INFO [compile_hlg.py:76] Connecting LG after k2.determinize
-  2021-08-23 19:27:35,934 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG
-  2021-08-23 19:27:35,934 INFO [compile_hlg.py:87] LG shape after k2.remove_epsilon: (6, None)
-  2021-08-23 19:27:35,935 INFO [compile_hlg.py:92] Arc sorting LG
-  2021-08-23 19:27:35,935 INFO [compile_hlg.py:95] Composing H and LG
-  2021-08-23 19:27:35,935 INFO [compile_hlg.py:102] Connecting LG
-  2021-08-23 19:27:35,935 INFO [compile_hlg.py:105] Arc sorting LG
-  2021-08-23 19:27:35,936 INFO [compile_hlg.py:107] HLG.shape: (8, None)
-  2021-08-23 19:27:35,936 INFO [compile_hlg.py:123] Saving HLG.pt to data/lang_phone
+   2023-05-12 17:55:21 (prepare.sh:27:main) dl_dir: /tmp/icefall/egs/yesno/ASR/download
+   2023-05-12 17:55:21 (prepare.sh:30:main) Stage 0: Download data
+   /tmp/icefall/egs/yesno/ASR/download/waves_yesno.tar.gz: 100%|_______________________________________________________________| 4.70M/4.70M [06:54<00:00, 11.4kB/s]
+   2023-05-12 18:02:19 (prepare.sh:39:main) Stage 1: Prepare yesno manifest
+   2023-05-12 18:02:21 (prepare.sh:45:main) Stage 2: Compute fbank for yesno
+   2023-05-12 18:02:23,199 INFO [compute_fbank_yesno.py:65] Processing train
+   Extracting and storing features: 100%|_______________________________________________________________| 90/90 [00:00<00:00, 212.60it/s]
+   2023-05-12 18:02:23,640 INFO [compute_fbank_yesno.py:65] Processing test
+   Extracting and storing features: 100%|_______________________________________________________________| 30/30 [00:00<00:00, 304.53it/s]
+   2023-05-12 18:02:24 (prepare.sh:51:main) Stage 3: Prepare lang
+   2023-05-12 18:02:26 (prepare.sh:66:main) Stage 4: Prepare G
+   /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):79
+   [I] Reading \data\ section.
+   /project/kaldilm/csrc/arpa_file_parser.cc:void kaldilm::ArpaFileParser::Read(std::istream&):140
+   [I] Reading \1-grams: section.
+   2023-05-12 18:02:26 (prepare.sh:92:main) Stage 5: Compile HLG
+   2023-05-12 18:02:28,581 INFO [compile_hlg.py:124] Processing data/lang_phone
+   2023-05-12 18:02:28,582 INFO [lexicon.py:171] Converting L.pt to Linv.pt
+   2023-05-12 18:02:28,609 INFO [compile_hlg.py:48] Building ctc_topo. max_token_id: 3
+   2023-05-12 18:02:28,610 INFO [compile_hlg.py:52] Loading G.fst.txt
+   2023-05-12 18:02:28,611 INFO [compile_hlg.py:62] Intersecting L and G
+   2023-05-12 18:02:28,613 INFO [compile_hlg.py:64] LG shape: (4, None)
+   2023-05-12 18:02:28,613 INFO [compile_hlg.py:66] Connecting LG
+   2023-05-12 18:02:28,614 INFO [compile_hlg.py:68] LG shape after k2.connect: (4, None)
+   2023-05-12 18:02:28,614 INFO [compile_hlg.py:70] <class 'torch.Tensor'>
+   2023-05-12 18:02:28,614 INFO [compile_hlg.py:71] Determinizing LG
+   2023-05-12 18:02:28,615 INFO [compile_hlg.py:74] <class '_k2.ragged.RaggedTensor'>
+   2023-05-12 18:02:28,615 INFO [compile_hlg.py:76] Connecting LG after k2.determinize
+   2023-05-12 18:02:28,615 INFO [compile_hlg.py:79] Removing disambiguation symbols on LG
+   2023-05-12 18:02:28,616 INFO [compile_hlg.py:91] LG shape after k2.remove_epsilon: (6, None)
+   2023-05-12 18:02:28,617 INFO [compile_hlg.py:96] Arc sorting LG
+   2023-05-12 18:02:28,617 INFO [compile_hlg.py:99] Composing H and LG
+   2023-05-12 18:02:28,619 INFO [compile_hlg.py:106] Connecting LG
+   2023-05-12 18:02:28,619 INFO [compile_hlg.py:109] Arc sorting LG
+   2023-05-12 18:02:28,619 INFO [compile_hlg.py:111] HLG.shape: (8, None)
+   2023-05-12 18:02:28,619 INFO [compile_hlg.py:127] Saving HLG.pt to data/lang_phone


 Training
@ -408,49 +432,53 @@ The training log is given below:

 .. code-block::

-  2021-08-23 19:30:31,072 INFO [train.py:465] Training started
-  2021-08-23 19:30:31,072 INFO [train.py:466] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01,
-  'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, '
-  best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'valid_interval': 10, 'beam_size': 10, 'reduction': 'sum', 'use_doub
-  le_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'feature_dir': PosixPath('data/fbank'
-  ), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0
-  , 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
-  2021-08-23 19:30:31,074 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
-  2021-08-23 19:30:31,098 INFO [asr_datamodule.py:146] About to get train cuts
-  2021-08-23 19:30:31,098 INFO [asr_datamodule.py:240] About to get train cuts
-  2021-08-23 19:30:31,102 INFO [asr_datamodule.py:149] About to create train dataset
-  2021-08-23 19:30:31,102 INFO [asr_datamodule.py:200] Using SingleCutSampler.
-  2021-08-23 19:30:31,102 INFO [asr_datamodule.py:206] About to create train dataloader
-  2021-08-23 19:30:31,102 INFO [asr_datamodule.py:219] About to get test cuts
-  2021-08-23 19:30:31,102 INFO [asr_datamodule.py:246] About to get test cuts
-  2021-08-23 19:30:31,357 INFO [train.py:416] Epoch 0, batch 0, batch avg loss 1.0789, total avg loss: 1.0789, batch size: 4
-  2021-08-23 19:30:31,848 INFO [train.py:416] Epoch 0, batch 10, batch avg loss 0.5356, total avg loss: 0.7556, batch size: 4
-  2021-08-23 19:30:32,301 INFO [train.py:432] Epoch 0, valid loss 0.9972, best valid loss: 0.9972 best valid epoch: 0
-  2021-08-23 19:30:32,805 INFO [train.py:416] Epoch 0, batch 20, batch avg loss 0.2436, total avg loss: 0.5717, batch size: 3
-  2021-08-23 19:30:33,109 INFO [train.py:432] Epoch 0, valid loss 0.4167, best valid loss: 0.4167 best valid epoch: 0
-  2021-08-23 19:30:33,121 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-0.pt
-  2021-08-23 19:30:33,325 INFO [train.py:416] Epoch 1, batch 0, batch avg loss 0.2214, total avg loss: 0.2214, batch size: 5
-  2021-08-23 19:30:33,798 INFO [train.py:416] Epoch 1, batch 10, batch avg loss 0.0781, total avg loss: 0.1343, batch size: 5
-  2021-08-23 19:30:34,065 INFO [train.py:432] Epoch 1, valid loss 0.0859, best valid loss: 0.0859 best valid epoch: 1
-  2021-08-23 19:30:34,556 INFO [train.py:416] Epoch 1, batch 20, batch avg loss 0.0421, total avg loss: 0.0975, batch size: 3
-  2021-08-23 19:30:34,810 INFO [train.py:432] Epoch 1, valid loss 0.0431, best valid loss: 0.0431 best valid epoch: 1
-  2021-08-23 19:30:34,824 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-1.pt
+   2023-05-12 18:04:59,759 INFO [train.py:481] Training started
+   2023-05-12 18:04:59,759 INFO [train.py:482] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lr': 0.01, 'feature_dim': 23, 'weight_decay': 1e-06, 'start_epoch': 0, 
+   'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 10, 'reset_interval': 20, 'valid_interval': 10, 'beam_size': 10, 
+   'reduction': 'sum', 'use_double_scores': True, 'world_size': 1, 'master_port': 12354, 'tensorboard': True, 'num_epochs': 15, 'seed': 42, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0,
+   'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 'num_workers': 2, 
+   'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '3b7f09fa35e72589914f67089c0da9f196a92ca4', 'k2-git-date': 'Mon May 8 22:58:45 2023', 
+   'lhotse-version': '1.15.0.dev+git.6fcfced.clean', 'torch-version': '2.0.0+cu118', 'torch-cuda-available': False, 'torch-cuda-version': '11.8', 'python-version': '3.1', 'icefall-git-branch': 'master', 
+   'icefall-git-sha1': '30bde4b-clean', 'icefall-git-date': 'Thu May 11 17:37:47 2023', 'icefall-path': '/tmp/icefall', 
+   'k2-path': 'tmp/lib/python3.10/site-packages/k2-1.24.3.dev20230512+cuda11.8.torch2.0.0-py3.10-linux-x86_64.egg/k2/__init__.py', 
+   'lhotse-path': 'tmp/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'host', 'IP address': '0.0.0.0'}}
+   2023-05-12 18:04:59,761 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+   2023-05-12 18:04:59,764 INFO [train.py:495] device: cpu
+   2023-05-12 18:04:59,791 INFO [asr_datamodule.py:146] About to get train cuts
+   2023-05-12 18:04:59,791 INFO [asr_datamodule.py:244] About to get train cuts
+   2023-05-12 18:04:59,852 INFO [asr_datamodule.py:149] About to create train dataset
+   2023-05-12 18:04:59,852 INFO [asr_datamodule.py:199] Using SingleCutSampler.
+   2023-05-12 18:04:59,852 INFO [asr_datamodule.py:205] About to create train dataloader
+   2023-05-12 18:04:59,853 INFO [asr_datamodule.py:218] About to get test cuts
+   2023-05-12 18:04:59,853 INFO [asr_datamodule.py:252] About to get test cuts
+   2023-05-12 18:04:59,986 INFO [train.py:422] Epoch 0, batch 0, loss[loss=1.065, over 2436.00 frames. ], tot_loss[loss=1.065, over 2436.00 frames. ], batch size: 4
+   2023-05-12 18:05:00,352 INFO [train.py:422] Epoch 0, batch 10, loss[loss=0.4561, over 2828.00 frames. ], tot_loss[loss=0.7076, over 22192.90 frames. ], batch size: 4
+   2023-05-12 18:05:00,691 INFO [train.py:444] Epoch 0, validation loss=0.9002, over 18067.00 frames.
+   2023-05-12 18:05:00,996 INFO [train.py:422] Epoch 0, batch 20, loss[loss=0.2555, over 2695.00 frames. ], tot_loss[loss=0.484, over 34971.47 frames. ], batch size: 5
+   2023-05-12 18:05:01,217 INFO [train.py:444] Epoch 0, validation loss=0.4688, over 18067.00 frames.
+   2023-05-12 18:05:01,251 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-0.pt
+   2023-05-12 18:05:01,389 INFO [train.py:422] Epoch 1, batch 0, loss[loss=0.2532, over 2436.00 frames. ], tot_loss[loss=0.2532, over 2436.00 frames. ], batch size: 4
+   2023-05-12 18:05:01,637 INFO [train.py:422] Epoch 1, batch 10, loss[loss=0.1139, over 2828.00 frames. ], tot_loss[loss=0.1592, over 22192.90 frames. ], batch size: 4
+   2023-05-12 18:05:01,859 INFO [train.py:444] Epoch 1, validation loss=0.1629, over 18067.00 frames.
+   2023-05-12 18:05:02,094 INFO [train.py:422] Epoch 1, batch 20, loss[loss=0.0767, over 2695.00 frames. ], tot_loss[loss=0.118, over 34971.47 frames. ], batch size: 5
+   2023-05-12 18:05:02,350 INFO [train.py:444] Epoch 1, validation loss=0.06778, over 18067.00 frames.
+   2023-05-12 18:05:02,395 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-1.pt

  ... ...

-  2021-08-23 19:30:49,657 INFO [train.py:416] Epoch 13, batch 0, batch avg loss 0.0109, total avg loss: 0.0109, batch size: 5
-  2021-08-23 19:30:49,984 INFO [train.py:416] Epoch 13, batch 10, batch avg loss 0.0093, total avg loss: 0.0096, batch size: 4
-  2021-08-23 19:30:50,239 INFO [train.py:432] Epoch 13, valid loss 0.0104, best valid loss: 0.0101 best valid epoch: 12
-  2021-08-23 19:30:50,569 INFO [train.py:416] Epoch 13, batch 20, batch avg loss 0.0092, total avg loss: 0.0096, batch size: 2
-  2021-08-23 19:30:50,819 INFO [train.py:432] Epoch 13, valid loss 0.0101, best valid loss: 0.0101 best valid epoch: 13
-  2021-08-23 19:30:50,835 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-13.pt
-  2021-08-23 19:30:51,024 INFO [train.py:416] Epoch 14, batch 0, batch avg loss 0.0105, total avg loss: 0.0105, batch size: 5
-  2021-08-23 19:30:51,317 INFO [train.py:416] Epoch 14, batch 10, batch avg loss 0.0099, total avg loss: 0.0097, batch size: 4
-  2021-08-23 19:30:51,552 INFO [train.py:432] Epoch 14, valid loss 0.0108, best valid loss: 0.0101 best valid epoch: 13
-  2021-08-23 19:30:51,869 INFO [train.py:416] Epoch 14, batch 20, batch avg loss 0.0096, total avg loss: 0.0097, batch size: 5
-  2021-08-23 19:30:52,107 INFO [train.py:432] Epoch 14, valid loss 0.0102, best valid loss: 0.0101 best valid epoch: 13
-  2021-08-23 19:30:52,126 INFO [checkpoint.py:62] Saving checkpoint to tdnn/exp/epoch-14.pt
-  2021-08-23 19:30:52,128 INFO [train.py:537] Done!
+   2023-05-12 18:05:14,789 INFO [train.py:422] Epoch 13, batch 0, loss[loss=0.01056, over 2436.00 frames. ], tot_loss[loss=0.01056, over 2436.00 frames. ], batch size: 4
+   2023-05-12 18:05:15,016 INFO [train.py:422] Epoch 13, batch 10, loss[loss=0.009022, over 2828.00 frames. ], tot_loss[loss=0.009985, over 22192.90 frames. ], batch size: 4
+   2023-05-12 18:05:15,271 INFO [train.py:444] Epoch 13, validation loss=0.01088, over 18067.00 frames.
+   2023-05-12 18:05:15,497 INFO [train.py:422] Epoch 13, batch 20, loss[loss=0.01174, over 2695.00 frames. ], tot_loss[loss=0.01077, over 34971.47 frames. ], batch size: 5
+   2023-05-12 18:05:15,747 INFO [train.py:444] Epoch 13, validation loss=0.01087, over 18067.00 frames.
+   2023-05-12 18:05:15,783 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-13.pt
+   2023-05-12 18:05:15,921 INFO [train.py:422] Epoch 14, batch 0, loss[loss=0.01045, over 2436.00 frames. ], tot_loss[loss=0.01045, over 2436.00 frames. ], batch size: 4
+   2023-05-12 18:05:16,146 INFO [train.py:422] Epoch 14, batch 10, loss[loss=0.008957, over 2828.00 frames. ], tot_loss[loss=0.009903, over 22192.90 frames. ], batch size: 4
+   2023-05-12 18:05:16,374 INFO [train.py:444] Epoch 14, validation loss=0.01092, over 18067.00 frames.
+   2023-05-12 18:05:16,598 INFO [train.py:422] Epoch 14, batch 20, loss[loss=0.01169, over 2695.00 frames. ], tot_loss[loss=0.01065, over 34971.47 frames. ], batch size: 5
+   2023-05-12 18:05:16,824 INFO [train.py:444] Epoch 14, validation loss=0.01077, over 18067.00 frames.
+   2023-05-12 18:05:16,862 INFO [checkpoint.py:75] Saving checkpoint to tdnn/exp/epoch-14.pt
+   2023-05-12 18:05:16,865 INFO [train.py:555] Done!

 Decoding
 ~~~~~~~~
@ -465,22 +493,25 @@ The decoding log is:

 .. code-block::

-  2021-08-23 19:35:30,192 INFO [decode.py:249] Decoding started
-  2021-08-23 19:35:30,192 INFO [decode.py:250] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'feature_dir': PosixPath('data/fbank'), 'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2}
-  2021-08-23 19:35:30,193 INFO [lexicon.py:113] Loading pre-compiled data/lang_phone/Linv.pt
-  2021-08-23 19:35:30,213 INFO [decode.py:259] device: cpu
-  2021-08-23 19:35:30,217 INFO [decode.py:279] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
-  /tmp/icefall/icefall/checkpoint.py:146: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch.
-  It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
-  To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /pytorch/aten/src/ATen/native/BinaryOps.cpp:450.)
-    avg[k] //= n
-  2021-08-23 19:35:30,220 INFO [asr_datamodule.py:219] About to get test cuts
-  2021-08-23 19:35:30,220 INFO [asr_datamodule.py:246] About to get test cuts
-  2021-08-23 19:35:30,409 INFO [decode.py:190] batch 0/8, cuts processed until now is 4
-  2021-08-23 19:35:30,571 INFO [decode.py:228] The transcripts are stored in tdnn/exp/recogs-test_set.txt
-  2021-08-23 19:35:30,572 INFO [utils.py:317] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
-  2021-08-23 19:35:30,573 INFO [decode.py:236] Wrote detailed error stats to tdnn/exp/errs-test_set.txt
-  2021-08-23 19:35:30,573 INFO [decode.py:299] Done!
+   2023-05-12 18:08:30,482 INFO [decode.py:263] Decoding started
+   2023-05-12 18:08:30,483 INFO [decode.py:264] {'exp_dir': PosixPath('tdnn/exp'), 'lang_dir': PosixPath('data/lang_phone'), 'lm_dir': PosixPath('data/lm'), 'feature_dim': 23, 
+   'search_beam': 20, 'output_beam': 8, 'min_active_states': 30, 'max_active_states': 10000, 'use_double_scores': True, 'epoch': 14, 'avg': 2, 'export': False, 'feature_dir': PosixPath('data/fbank'), 
+   'max_duration': 30.0, 'bucketing_sampler': False, 'num_buckets': 10, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': False, 'return_cuts': True, 
+   'num_workers': 2, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '3b7f09fa35e72589914f67089c0da9f196a92ca4', 'k2-git-date': 'Mon May 8 22:58:45 2023', 
+   'lhotse-version': '1.15.0.dev+git.6fcfced.clean', 'torch-version': '2.0.0+cu118', 'torch-cuda-available': False, 'torch-cuda-version': '11.8', 'python-version': '3.1', 'icefall-git-branch': 'master', 
+   'icefall-git-sha1': '30bde4b-clean', 'icefall-git-date': 'Thu May 11 17:37:47 2023', 'icefall-path': '/tmp/icefall', 
+   'k2-path': '/tmp/lib/python3.10/site-packages/k2-1.24.3.dev20230512+cuda11.8.torch2.0.0-py3.10-linux-x86_64.egg/k2/__init__.py', 
+   'lhotse-path': '/tmp/lib/python3.10/site-packages/lhotse/__init__.py', 'hostname': 'host', 'IP address': '0.0.0.0'}}
+   2023-05-12 18:08:30,483 INFO [lexicon.py:168] Loading pre-compiled data/lang_phone/Linv.pt
+   2023-05-12 18:08:30,487 INFO [decode.py:273] device: cpu
+   2023-05-12 18:08:30,513 INFO [decode.py:291] averaging ['tdnn/exp/epoch-13.pt', 'tdnn/exp/epoch-14.pt']
+   2023-05-12 18:08:30,521 INFO [asr_datamodule.py:218] About to get test cuts
+   2023-05-12 18:08:30,521 INFO [asr_datamodule.py:252] About to get test cuts
+   2023-05-12 18:08:30,675 INFO [decode.py:204] batch 0/?, cuts processed until now is 4
+   2023-05-12 18:08:30,923 INFO [decode.py:241] The transcripts are stored in tdnn/exp/recogs-test_set.txt
+   2023-05-12 18:08:30,924 INFO [utils.py:558] [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
+   2023-05-12 18:08:30,925 INFO [decode.py:249] Wrote detailed error stats to tdnn/exp/errs-test_set.txt
+   2023-05-12 18:08:30,925 INFO [decode.py:316] Done!

 **Congratulations!** You have successfully setup the environment and have run the first recipe in ``icefall``.

--- a/docs/source/model-export/code/export-conv-emformer-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-conv-emformer-transducer-for-ncnn-output.txt
@ -0,0 +1,21 @@
+2023-01-11 12:15:38,677 INFO [export-for-ncnn.py:220] device: cpu
+2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:229] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_v
+alid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampl
+ing_factor': 4, 'decoder_dim': 512, 'joiner_dim': 512, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.2', 'k2-build-type':
+'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'a34171ed85605b0926eebbd0463d059431f4f74a', 'k2-git-date': 'Wed Dec 14 00:06:38 2022',
+ 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-vers
+ion': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'fix-stateless3-train-2022-12-27', 'icefall-git-sha1': '530e8a1-dirty', '
+icefall-git-date': 'Tue Dec 27 13:59:18 2022', 'icefall-path': '/star-fj/fangjun/open-source/icefall', 'k2-path': '/star-fj/fangjun/op
+en-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279
+-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '127.0.0.1'}, 'epoch': 30, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefa
+ll-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp'), 'bpe_model': './icefall-asr-librispeech-conv-emformer-transdu
+cer-stateless2-2022-07-05//data/lang_bpe_500/bpe.model', 'jit': False, 'context_size': 2, 'use_averaged_model': False, 'encoder_dim':
+512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'cnn_module_kernel': 31, 'left_context_length': 32, 'chunk_length'
+: 32, 'right_context_length': 8, 'memory_size': 32, 'blank_id': 0, 'vocab_size': 500}
+2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:231] About to create model
+2023-01-11 12:15:40,053 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-conv-emformer-transducer-stateless2-2
+022-07-05/exp/epoch-30.pt
+2023-01-11 12:15:40,708 INFO [export-for-ncnn.py:315] Number of model parameters: 75490012
+2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:318] Using torch.jit.trace()
+2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:320] Exporting encoder
+2023-01-11 12:15:41,682 INFO [export-for-ncnn.py:149] chunk_length: 32, right_context_length: 8
--- a/docs/source/model-export/code/export-lstm-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-lstm-transducer-for-ncnn-output.txt
@ -0,0 +1,18 @@
+2023-02-17 11:22:42,862 INFO [export-for-ncnn.py:222] device: cpu
+2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:231] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'dim_feedforward': 2048, 'decoder_dim': 512, 'joiner_dim': 512, 'is_pnnx': False, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-dirty', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp'), 'bpe_model': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': 12, 'encoder_dim': 512, 'rnn_hidden_size': 1024, 'aux_layer_period': 0, 'blank_id': 0, 'vocab_size': 500}
+2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:235] About to create model
+2023-02-17 11:22:43,239 INFO [train.py:472] Disable giga
+2023-02-17 11:22:43,249 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/epoch-99.pt
+2023-02-17 11:22:44,595 INFO [export-for-ncnn.py:324] encoder parameters: 83137520
+2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:325] decoder parameters: 257024
+2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:326] joiner parameters: 781812
+2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:327] total parameters: 84176356
+2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:329] Using torch.jit.trace()
+2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:331] Exporting encoder
+2023-02-17 11:22:48,182 INFO [export-for-ncnn.py:158] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
+2023-02-17 11:22:48,183 INFO [export-for-ncnn.py:335] Exporting decoder
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/lstm_transducer_stateless2/decoder.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  need_pad = bool(need_pad)
+2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:180] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
+2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:339] Exporting joiner
+2023-02-17 11:22:48,304 INFO [export-for-ncnn.py:207] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
--- a/docs/source/model-export/code/export-zipformer-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-zipformer-transducer-for-ncnn-output.txt
@ -0,0 +1,74 @@
+2023-02-27 20:23:07,473 INFO [export-for-ncnn.py:246] device: cpu
+2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:255] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-clean', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp'), 'bpe_model': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 32, 'blank_id': 0, 'vocab_size': 500}
+2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:257] About to create model
+2023-02-27 20:23:08,023 INFO [zipformer2.py:419] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2023-02-27 20:23:08,037 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/epoch-99.pt
+2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:346] encoder parameters: 68944004
+2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:347] decoder parameters: 260096
+2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:348] joiner parameters: 716276
+2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:349] total parameters: 69920376
+2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:351] Using torch.jit.trace()
+2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:353] Exporting encoder
+2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:174] decode_chunk_len: 32
+2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:175] T: 39
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1344: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_len.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_avg.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_key.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1356: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_val.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1360: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_val2.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1364: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_conv1.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1368: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_conv2.size(0) == self.num_layers, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert self.left_context_len == cached_key.shape[1], (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1884: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert self.x_size == x.size(0), (self.x_size, x.size(0))
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2442: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_key.shape[0] == self.left_context_len, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_key.shape[0] == cached_val.shape[0], (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2469: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_key.shape[0] == left_context_len, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2473: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_val.shape[0] == left_context_len, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2483: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert kv_len == k.shape[0], (kv_len, k.shape)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2570: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert list(attn_output.size()) == [bsz * num_heads, seq_len, head_dim // 2]
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2926: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cache.shape == (x.size(0), x.size(1), self.lorder), (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2652: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert x.shape[0] == self.x_size, (x.shape[0], self.x_size)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2653: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert x.shape[2] == self.embed_dim, (x.shape[2], self.embed_dim)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2666: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert cached_val.shape[0] == self.left_context_len, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1543: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src.shape[0] == self.in_x_size, (src.shape[0], self.in_x_size)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1637: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src.shape[0] == self.in_x_size, (
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1643: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src.shape[2] == self.in_channels, (src.shape[2], self.in_channels)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1571: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  if src.shape[0] != self.in_x_size:
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1763: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src1.shape[:-1] == src2.shape[:-1], (src1.shape, src2.shape)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1779: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src1.shape[-1] == self.dim1, (src1.shape[-1], self.dim1)
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1780: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert src2.shape[-1] == self.dim2, (src2.shape[-1], self.dim2)
+/star-fj/fangjun/py38/lib/python3.8/site-packages/torch/jit/_trace.py:958: TracerWarning: Encountering a list at the output of the tracer might cause the trace to be incorrect, this is only valid if the container structure does not change based on the module's inputs. Consider using a constant container instead (e.g. for `list`, use a `tuple` instead. for `dict`, use a `NamedTuple` instead). If you absolutely need this and know the side effects, pass strict=False to trace() to allow this behavior.
+  module._c._create_method_from_trace(
+2023-02-27 20:23:19,640 INFO [export-for-ncnn.py:182] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
+2023-02-27 20:23:19,646 INFO [export-for-ncnn.py:357] Exporting decoder
+/star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decoder.py:102: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
+  assert embedding_out.size(-1) == self.context_size
+2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:204] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
+2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:361] Exporting joiner
+2023-02-27 20:23:19,735 INFO [export-for-ncnn.py:231] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
--- a/docs/source/model-export/code/generate-int-8-scale-table-for-conv-emformer.txt
+++ b/docs/source/model-export/code/generate-int-8-scale-table-for-conv-emformer.txt
@ -0,0 +1,104 @@
+Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
+num encoder conv layers: 88
+num joiner conv layers: 3
+num files: 3
+Processing ../test_wavs/1089-134686-0001.wav
+Processing ../test_wavs/1221-135766-0001.wav
+Processing ../test_wavs/1221-135766-0002.wav
+Processing ../test_wavs/1089-134686-0001.wav
+Processing ../test_wavs/1221-135766-0001.wav
+Processing ../test_wavs/1221-135766-0002.wav
+----------encoder----------
+conv_87                                  : max = 15.942385        threshold = 15.938493        scale = 7.968131
+conv_88                                  : max = 35.442448        threshold = 15.549335        scale = 8.167552
+conv_89                                  : max = 23.228289        threshold = 8.001738         scale = 15.871552
+linear_90                                : max = 3.976146         threshold = 1.101789         scale = 115.267128
+linear_91                                : max = 6.962030         threshold = 5.162033         scale = 24.602713
+linear_92                                : max = 12.323041        threshold = 3.853959         scale = 32.953129
+linear_94                                : max = 6.905416         threshold = 4.648006         scale = 27.323545
+linear_93                                : max = 6.905416         threshold = 5.474093         scale = 23.200188
+linear_95                                : max = 1.888012         threshold = 1.403563         scale = 90.483986
+linear_96                                : max = 6.856741         threshold = 5.398679         scale = 23.524273
+linear_97                                : max = 9.635942         threshold = 2.613655         scale = 48.590950
+linear_98                                : max = 6.460340         threshold = 5.670146         scale = 22.398010
+linear_99                                : max = 9.532276         threshold = 2.585537         scale = 49.119396
+linear_101                               : max = 6.585871         threshold = 5.719224         scale = 22.205809
+linear_100                               : max = 6.585871         threshold = 5.751382         scale = 22.081648
+linear_102                               : max = 1.593344         threshold = 1.450581         scale = 87.551147
+linear_103                               : max = 6.592681         threshold = 5.705824         scale = 22.257959
+linear_104                               : max = 8.752957         threshold = 1.980955         scale = 64.110489
+linear_105                               : max = 6.696240         threshold = 5.877193         scale = 21.608953
+linear_106                               : max = 9.059659         threshold = 2.643138         scale = 48.048950
+linear_108                               : max = 6.975461         threshold = 4.589567         scale = 27.671457
+linear_107                               : max = 6.975461         threshold = 6.190381         scale = 20.515701
+linear_109                               : max = 3.710759         threshold = 2.305635         scale = 55.082436
+linear_110                               : max = 7.531228         threshold = 5.731162         scale = 22.159557
+linear_111                               : max = 10.528083        threshold = 2.259322         scale = 56.211544
+linear_112                               : max = 8.148807         threshold = 5.500842         scale = 23.087374
+linear_113                               : max = 8.592566         threshold = 1.948851         scale = 65.166611
+linear_115                               : max = 8.437109         threshold = 5.608947         scale = 22.642395
+linear_114                               : max = 8.437109         threshold = 6.193942         scale = 20.503904
+linear_116                               : max = 3.966980         threshold = 3.200896         scale = 39.676392
+linear_117                               : max = 9.451303         threshold = 6.061664         scale = 20.951344
+linear_118                               : max = 12.077262        threshold = 3.965800         scale = 32.023804
+linear_119                               : max = 9.671615         threshold = 4.847613         scale = 26.198460
+linear_120                               : max = 8.625638         threshold = 3.131427         scale = 40.556595
+linear_122                               : max = 10.274080        threshold = 4.888716         scale = 25.978189
+linear_121                               : max = 10.274080        threshold = 5.420480         scale = 23.429659
+linear_123                               : max = 4.826197         threshold = 3.599617         scale = 35.281532
+linear_124                               : max = 11.396383        threshold = 7.325849         scale = 17.335875
+linear_125                               : max = 9.337198         threshold = 3.941410         scale = 32.221970
+linear_126                               : max = 9.699965         threshold = 4.842878         scale = 26.224073
+linear_127                               : max = 8.775370         threshold = 3.884215         scale = 32.696438
+linear_129                               : max = 9.872276         threshold = 4.837319         scale = 26.254213
+linear_128                               : max = 9.872276         threshold = 7.180057         scale = 17.687883
+linear_130                               : max = 4.150427         threshold = 3.454298         scale = 36.765789
+linear_131                               : max = 11.112692        threshold = 7.924847         scale = 16.025545
+linear_132                               : max = 11.852893        threshold = 3.116593         scale = 40.749626
+linear_133                               : max = 11.517084        threshold = 5.024665         scale = 25.275314
+linear_134                               : max = 10.683807        threshold = 3.878618         scale = 32.743618
+linear_136                               : max = 12.421055        threshold = 6.322729         scale = 20.086264
+linear_135                               : max = 12.421055        threshold = 5.309880         scale = 23.917679
+linear_137                               : max = 4.827781         threshold = 3.744595         scale = 33.915554
+linear_138                               : max = 14.422395        threshold = 7.742882         scale = 16.402161
+linear_139                               : max = 8.527538         threshold = 3.866123         scale = 32.849449
+linear_140                               : max = 12.128619        threshold = 4.657793         scale = 27.266134
+linear_141                               : max = 9.839593         threshold = 3.845993         scale = 33.021378
+linear_143                               : max = 12.442304        threshold = 7.099039         scale = 17.889746
+linear_142                               : max = 12.442304        threshold = 5.325038         scale = 23.849592
+linear_144                               : max = 5.929444         threshold = 5.618206         scale = 22.605080
+linear_145                               : max = 13.382126        threshold = 9.321095         scale = 13.625010
+linear_146                               : max = 9.894987         threshold = 3.867645         scale = 32.836517
+linear_147                               : max = 10.915313        threshold = 4.906028         scale = 25.886522
+linear_148                               : max = 9.614287         threshold = 3.908151         scale = 32.496181
+linear_150                               : max = 11.724932        threshold = 4.485588         scale = 28.312899
+linear_149                               : max = 11.724932        threshold = 5.161146         scale = 24.606939
+linear_151                               : max = 7.164453         threshold = 5.847355         scale = 21.719223
+linear_152                               : max = 13.086471        threshold = 5.984121         scale = 21.222834
+linear_153                               : max = 11.099524        threshold = 3.991601         scale = 31.816805
+linear_154                               : max = 10.054585        threshold = 4.489706         scale = 28.286930
+linear_155                               : max = 12.389185        threshold = 3.100321         scale = 40.963501
+linear_157                               : max = 9.982999         threshold = 5.154796         scale = 24.637253
+linear_156                               : max = 9.982999         threshold = 8.537706         scale = 14.875190
+linear_158                               : max = 8.420287         threshold = 6.502287         scale = 19.531588
+linear_159                               : max = 25.014746        threshold = 9.423280         scale = 13.477261
+linear_160                               : max = 45.633553        threshold = 5.715335         scale = 22.220921
+linear_161                               : max = 20.371849        threshold = 5.117830         scale = 24.815203
+linear_162                               : max = 12.492933        threshold = 3.126283         scale = 40.623318
+linear_164                               : max = 20.697504        threshold = 4.825712         scale = 26.317358
+linear_163                               : max = 20.697504        threshold = 5.078367         scale = 25.008038
+linear_165                               : max = 9.023975         threshold = 6.836278         scale = 18.577358
+linear_166                               : max = 34.860619        threshold = 7.259792         scale = 17.493614
+linear_167                               : max = 30.380934        threshold = 5.496160         scale = 23.107042
+linear_168                               : max = 20.691216        threshold = 4.733317         scale = 26.831076
+linear_169                               : max = 9.723948         threshold = 3.952728         scale = 32.129707
+linear_171                               : max = 21.034811        threshold = 5.366547         scale = 23.665123
+linear_170                               : max = 21.034811        threshold = 5.356277         scale = 23.710501
+linear_172                               : max = 10.556884        threshold = 5.729481         scale = 22.166058
+linear_173                               : max = 20.033039        threshold = 10.207264        scale = 12.442120
+linear_174                               : max = 11.597379        threshold = 2.658676         scale = 47.768131
+----------joiner----------
+linear_2                                 : max = 19.293503        threshold = 14.305265        scale = 8.877850
+linear_1                                 : max = 10.812222        threshold = 8.766452         scale = 14.487047
+linear_3                                 : max = 0.999999         threshold = 0.999755         scale = 127.031174
+ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
--- a/docs/source/model-export/code/generate-int-8-scale-table-for-lstm.txt
+++ b/docs/source/model-export/code/generate-int-8-scale-table-for-lstm.txt
@ -0,0 +1,44 @@
+Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
+num encoder conv layers: 28
+num joiner conv layers: 3
+num files: 3
+Processing ../test_wavs/1089-134686-0001.wav
+Processing ../test_wavs/1221-135766-0001.wav
+Processing ../test_wavs/1221-135766-0002.wav
+Processing ../test_wavs/1089-134686-0001.wav
+Processing ../test_wavs/1221-135766-0001.wav
+Processing ../test_wavs/1221-135766-0002.wav
+----------encoder----------
+conv_15                                  : max = 15.942385        threshold = 15.930708        scale = 7.972025
+conv_16                                  : max = 44.978855        threshold = 17.031788        scale = 7.456645
+conv_17                                  : max = 17.868437        threshold = 7.830528         scale = 16.218575
+linear_18                                : max = 3.107259         threshold = 1.194808         scale = 106.293236
+linear_19                                : max = 6.193777         threshold = 4.634748         scale = 27.401705
+linear_20                                : max = 9.259933         threshold = 2.606617         scale = 48.722160
+linear_21                                : max = 5.186600         threshold = 4.790260         scale = 26.512129
+linear_22                                : max = 9.759041         threshold = 2.265832         scale = 56.050053
+linear_23                                : max = 3.931209         threshold = 3.099090         scale = 40.979767
+linear_24                                : max = 10.324160        threshold = 2.215561         scale = 57.321835
+linear_25                                : max = 3.800708         threshold = 3.599352         scale = 35.284134
+linear_26                                : max = 10.492444        threshold = 3.153369         scale = 40.274391
+linear_27                                : max = 3.660161         threshold = 2.720994         scale = 46.674126
+linear_28                                : max = 9.415265         threshold = 3.174434         scale = 40.007133
+linear_29                                : max = 4.038418         threshold = 3.118534         scale = 40.724262
+linear_30                                : max = 10.072084        threshold = 3.936867         scale = 32.259155
+linear_31                                : max = 4.342712         threshold = 3.599489         scale = 35.282787
+linear_32                                : max = 11.340535        threshold = 3.120308         scale = 40.701103
+linear_33                                : max = 3.846987         threshold = 3.630030         scale = 34.985939
+linear_34                                : max = 10.686298        threshold = 2.204571         scale = 57.607586
+linear_35                                : max = 4.904821         threshold = 4.575518         scale = 27.756420
+linear_36                                : max = 11.806659        threshold = 2.585589         scale = 49.118401
+linear_37                                : max = 6.402340         threshold = 5.047157         scale = 25.162680
+linear_38                                : max = 11.174589        threshold = 1.923361         scale = 66.030258
+linear_39                                : max = 16.178576        threshold = 7.556058         scale = 16.807705
+linear_40                                : max = 12.901954        threshold = 5.301267         scale = 23.956539
+linear_41                                : max = 14.839805        threshold = 7.597429         scale = 16.716181
+linear_42                                : max = 10.178945        threshold = 2.651595         scale = 47.895699
+----------joiner----------
+linear_2                                 : max = 24.829245        threshold = 16.627592        scale = 7.637907
+linear_1                                 : max = 10.746186        threshold = 5.255032         scale = 24.167313
+linear_3                                 : max = 1.000000         threshold = 0.999756         scale = 127.031013
+ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
@ -0,0 +1,7 @@
+2023-01-11 14:02:12,216 INFO [streaming-ncnn-decode.py:320] {'tokens': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav'}
+T 51 32
+2023-01-11 14:02:13,141 INFO [streaming-ncnn-decode.py:328] Constructing Fbank computer
+2023-01-11 14:02:13,151 INFO [streaming-ncnn-decode.py:331] Reading sound files: ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
+2023-01-11 14:02:13,176 INFO [streaming-ncnn-decode.py:336] torch.Size([106000])
+2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:380] ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
+2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:381] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
@ -0,0 +1,6 @@
+2023-02-17 11:37:30,861 INFO [streaming-ncnn-decode.py:255] {'tokens': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav'}
+2023-02-17 11:37:31,425 INFO [streaming-ncnn-decode.py:263] Constructing Fbank computer
+2023-02-17 11:37:31,427 INFO [streaming-ncnn-decode.py:266] Reading sound files: ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
+2023-02-17 11:37:31,431 INFO [streaming-ncnn-decode.py:271] torch.Size([106000])
+2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:342] ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
+2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:343] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
@ -0,0 +1,7 @@
+2023-02-27 20:43:40,283 INFO [streaming-ncnn-decode.py:349] {'tokens': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav'}
+2023-02-27 20:43:41,260 INFO [streaming-ncnn-decode.py:357] Constructing Fbank computer
+2023-02-27 20:43:41,264 INFO [streaming-ncnn-decode.py:360] Reading sound files: ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
+2023-02-27 20:43:41,269 INFO [streaming-ncnn-decode.py:365] torch.Size([106000])
+2023-02-27 20:43:41,280 INFO [streaming-ncnn-decode.py:372] number of states: 35
+2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:410] ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
+2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:411] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/export-ncnn-conv-emformer.rst
+++ b/docs/source/model-export/export-ncnn-conv-emformer.rst
@ -0,0 +1,753 @@
+.. _export_conv_emformer_transducer_models_to_ncnn:
+
+Export ConvEmformer transducer models to ncnn
+=============================================
+
+We use the pre-trained model from the following repository as an example:
+
+  - `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
+
+We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
+
+.. hint::
+
+  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
+
+.. caution::
+
+  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
+  may ``not`` work.
+
+1. Download the pre-trained model
+---------------------------------
+
+.. hint::
+
+  You can also refer to `<https://k2-fsa.github.io/sherpa/cpp/pretrained_models/online_transducer.html#icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_ to download the pre-trained model.
+
+  You have to install `git-lfs`_ before you continue.
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+
+  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
+
+  git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
+  git lfs pull --include "data/lang_bpe_500/bpe.model"
+
+  cd ..
+
+.. note::
+
+  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
+
+
+In the above code, we downloaded the pre-trained model into the directory
+``egs/librispeech/ASR/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05``.
+
+.. _export_for_ncnn_install_ncnn_and_pnnx:
+
+2. Install ncnn and pnnx
+------------------------
+
+.. code-block:: bash
+
+  # We put ncnn into $HOME/open-source/ncnn
+  # You can change it to anywhere you like
+
+  cd $HOME
+  mkdir -p open-source
+  cd open-source
+
+  git clone https://github.com/csukuangfj/ncnn
+  cd ncnn
+  git submodule update --recursive --init
+
+  # Note: We don't use "python setup.py install" or "pip install ." here
+
+  mkdir -p build-wheel
+  cd build-wheel
+
+  cmake \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DNCNN_PYTHON=ON \
+    -DNCNN_BUILD_BENCHMARK=OFF \
+    -DNCNN_BUILD_EXAMPLES=OFF \
+    -DNCNN_BUILD_TOOLS=ON \
+  ..
+
+  make -j4
+
+  cd ..
+
+  # Note: $PWD here is $HOME/open-source/ncnn
+
+  export PYTHONPATH=$PWD/python:$PYTHONPATH
+  export PATH=$PWD/tools/pnnx/build/src:$PATH
+  export PATH=$PWD/build-wheel/tools/quantize:$PATH
+
+  # Now build pnnx
+  cd tools/pnnx
+  mkdir build
+  cd build
+  cmake ..
+  make -j4
+
+  ./src/pnnx
+
+Congratulations! You have successfully installed the following components:
+
+  - ``pnnx``, which is an executable located in
+    ``$HOME/open-source/ncnn/tools/pnnx/build/src``. We will use
+    it to convert models exported by ``torch.jit.trace()``.
+  - ``ncnn2int8``, which is an executable located in
+    ``$HOME/open-source/ncnn/build-wheel/tools/quantize``. We will use
+    it to quantize our models to ``int8``.
+  - ``ncnn.cpython-38-x86_64-linux-gnu.so``, which is a Python module located
+    in ``$HOME/open-source/ncnn/python/ncnn``.
+
+    .. note::
+
+      I am using ``Python 3.8``, so it
+      is ``ncnn.cpython-38-x86_64-linux-gnu.so``. If you use a different
+      version, say, ``Python 3.9``, the name would be
+      ``ncnn.cpython-39-x86_64-linux-gnu.so``.
+
+      Also, if you are not using Linux, the file name would also be different.
+      But that does not matter. As long as you can compile it, it should work.
+
+We have set up ``PYTHONPATH`` so that you can use ``import ncnn`` in your
+Python code. We have also set up ``PATH`` so that you can use
+``pnnx`` and ``ncnn2int8`` later in your terminal.
+
+.. caution::
+
+  Please don't use `<https://github.com/tencent/ncnn>`_.
+  We have made some modifications to the offical `ncnn`_.
+
+  We will synchronize `<https://github.com/csukuangfj/ncnn>`_ periodically
+  with the official one.
+
+3. Export the model via torch.jit.trace()
+-----------------------------------------
+
+First, let us rename our pre-trained model:
+
+.. code-block::
+
+  cd egs/librispeech/ASR
+
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp
+
+  ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-30.pt
+
+  cd ../..
+
+Next, we use the following code to export our model:
+
+.. code-block:: bash
+
+  dir=./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/
+
+  ./conv_emformer_transducer_stateless2/export-for-ncnn.py \
+    --exp-dir $dir/exp \
+    --bpe-model $dir/data/lang_bpe_500/bpe.model \
+    --epoch 30 \
+    --avg 1 \
+    --use-averaged-model 0 \
+    \
+    --num-encoder-layers 12 \
+    --chunk-length 32 \
+    --cnn-module-kernel 31 \
+    --left-context-length 32 \
+    --right-context-length 8 \
+    --memory-size 32 \
+    --encoder-dim 512
+
+.. caution::
+
+   If your model has different configuration parameters, please change them accordingly.
+
+.. hint::
+
+  We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
+  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
+
+  If you have trained a model by yourself and if you have all checkpoints
+  available, please first use ``decode.py`` to tune ``--epoch --avg``
+  and select the best combination with with ``--use-averaged-model 1``.
+
+.. note::
+
+  You will see the following log output:
+
+  .. literalinclude:: ./code/export-conv-emformer-transducer-for-ncnn-output.txt
+
+  The log shows the model has ``75490012`` parameters, i.e., ``~75 M``.
+
+  .. code-block::
+
+    ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
+
+    -rw-r--r-- 1 kuangfangjun root 289M Jan 11 12:05 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
+
+  You can see that the file size of the pre-trained model is ``289 MB``, which
+  is roughly equal to ``75490012*4/1024/1024 = 287.97 MB``.
+
+After running ``conv_emformer_transducer_stateless2/export-for-ncnn.py``,
+we will get the following files:
+
+.. code-block:: bash
+
+  ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*pnnx*
+
+  -rw-r--r-- 1 kuangfangjun root 1010K Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  283M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  3.0M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.pt
+
+
+.. _conv-emformer-step-4-export-torchscript-model-via-pnnx:
+
+4. Export torchscript model via pnnx
+------------------------------------
+
+.. hint::
+
+  Make sure you have set up the ``PATH`` environment variable. Otherwise,
+  it will throw an error saying that ``pnnx`` could not be found.
+
+Now, it's time to export our models to `ncnn`_ via ``pnnx``.
+
+.. code-block::
+
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+  pnnx ./encoder_jit_trace-pnnx.pt
+  pnnx ./decoder_jit_trace-pnnx.pt
+  pnnx ./joiner_jit_trace-pnnx.pt
+
+It will generate the following files:
+
+.. code-block:: bash
+
+  ls -lh  icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*ncnn*{bin,param}
+
+  -rw-r--r-- 1 kuangfangjun root 503K Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  437 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 142M Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  79K Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 1.5M Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  488 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
+
+There are two types of files:
+
+- ``param``: It is a text file containing the model architectures. You can
+  use a text editor to view its content.
+- ``bin``: It is a binary file containing the model parameters.
+
+We compare the file sizes of the models below before and after converting via ``pnnx``:
+
+.. see https://tableconvert.com/restructuredtext-generator
+
+----------------------------------+------------+
+| File name                        | File size  |
+==================================+============+
+| encoder_jit_trace-pnnx.pt        | 283 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.pt        | 1010 KB    |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.pt         | 3.0 MB     |
+----------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin  | 142 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin  | 503 KB     |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin   | 1.5 MB     |
+----------------------------------+------------+
+
+You can see that the file sizes of the models after conversion are about one half
+of the models before conversion:
+
+  - encoder: 283 MB vs 142 MB
+  - decoder: 1010 KB vs 503 KB
+  - joiner: 3.0 MB vs 1.5 MB
+
+The reason is that by default ``pnnx`` converts ``float32`` parameters
+to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
+for ``float16``. Thus, it is ``twice smaller`` after conversion.
+
+.. hint::
+
+  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
+  won't convert ``float32`` to ``float16``.
+
+5. Test the exported models in icefall
+--------------------------------------
+
+.. note::
+
+  We assume you have set up the environment variable ``PYTHONPATH`` when
+  building `ncnn`_.
+
+Now we have successfully converted our pre-trained model to `ncnn`_ format.
+The generated 6 files are what we need. You can use the following code to
+test the converted models:
+
+.. code-block:: bash
+
+  ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
+    --tokens ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt \
+    --encoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param \
+    --encoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin \
+    --decoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param \
+    --decoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin \
+    --joiner-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param \
+    --joiner-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin \
+    ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
+
+.. hint::
+
+  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
+  only 1 wave file as input.
+
+The output is given below:
+
+.. literalinclude:: ./code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
+
+Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
+
+
+.. _conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn:
+
+6. Modify the exported encoder for sherpa-ncnn
+----------------------------------------------
+
+In order to use the exported models in `sherpa-ncnn`_, we have to modify
+``encoder_jit_trace-pnnx.ncnn.param``.
+
+Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
+
+.. code-block::
+
+  7767517
+  1060 1342
+  Input                    in0                      0 1 in0
+
+**Explanation** of the above three lines:
+
+  1. ``7767517``, it is a magic number and should not be changed.
+  2. ``1060 1342``, the first number ``1060`` specifies the number of layers
+     in this file, while ``1342`` specifies the number of intermediate outputs
+     of this file
+  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
+     is the layer name of this layer; ``0`` means this layer has no input;
+     ``1`` means this layer has one output; ``in0`` is the output name of
+     this layer.
+
+We need to add 1 extra line and also increment the number of layers.
+The result looks like below:
+
+.. code-block:: bash
+
+  7767517
+  1061 1342
+  SherpaMetaData           sherpa_meta_data1        0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
+  Input                    in0                      0 1 in0
+
+**Explanation**
+
+  1. ``7767517``, it is still the same
+  2. ``1061 1342``, we have added an extra layer, so we need to update ``1060`` to ``1061``.
+     We don't need to change ``1342`` since the newly added layer has no inputs or outputs.
+  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512``
+     This line is newly added. Its explanation is given below:
+
+      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
+      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
+      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
+      - ``0=1``, 0 is the key and 1 is the value. MUST be ``0=1``
+      - ``1=12``, 1 is the key and 12 is the value of the
+        parameter ``--num-encoder-layers`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``2=32``, 2 is the key and 32 is the value of the
+        parameter ``--memory-size`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``3=31``, 3 is the key and 31 is the value of the
+        parameter ``--cnn-module-kernel`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``4=8``, 4 is the key and 8 is the value of the
+        parameter ``--left-context-length`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``5=32``, 5 is the key and 32 is the value of the
+        parameter ``--chunk-length`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``6=8``, 6 is the key and 8 is the value of the
+        parameter ``--right-context-length`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+      - ``7=512``, 7 is the key and 512 is the value of the
+        parameter ``--encoder-dim`` that you provided when running
+        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
+
+      For ease of reference, we list the key-value pairs that you need to add
+      in the following table. If your model has a different setting, please
+      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
+      will be ``SAD``.
+
+          +------+-----------------------------+
+          | key  | value                       |
+          +======+=============================+
+          | 0    | 1 (fixed)                   |
+          +------+-----------------------------+
+          | 1    | ``--num-encoder-layers``    |
+          +------+-----------------------------+
+          | 2    | ``--memory-size``           |
+          +------+-----------------------------+
+          | 3    | ``--cnn-module-kernel``     |
+          +------+-----------------------------+
+          | 4    | ``--left-context-length``   |
+          +------+-----------------------------+
+          | 5    | ``--chunk-length``          |
+          +------+-----------------------------+
+          | 6    | ``--right-context-length``  |
+          +------+-----------------------------+
+          | 7    | ``--encoder-dim``           |
+          +------+-----------------------------+
+
+  4. ``Input in0 0 1 in0``. No need to change it.
+
+.. caution::
+
+  When you add a new layer ``SherpaMetaData``, please remember to update the
+  number of layers. In our case, update  ``1060`` to ``1061``. Otherwise,
+  you will be SAD later.
+
+.. hint::
+
+  After adding the new layer ``SherpaMetaData``, you cannot use this model
+  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
+  supported only in `sherpa-ncnn`_.
+
+.. hint::
+
+  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
+  the ``param`` file! You don't need to change the ``bin`` file.
+
+Now you can use this model in `sherpa-ncnn`_.
+Please refer to the following documentation:
+
+  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
+  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
+  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
+  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
+
+We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
+
+  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
+
+    You can find more usages there.
+
+7. (Optional) int8 quantization with sherpa-ncnn
+------------------------------------------------
+
+This step is optional.
+
+In this step, we describe how to quantize our model with ``int8``.
+
+Change :ref:`conv-emformer-step-4-export-torchscript-model-via-pnnx` to
+disable ``fp16`` when using ``pnnx``:
+
+.. code-block::
+
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+  pnnx ./encoder_jit_trace-pnnx.pt fp16=0
+  pnnx ./decoder_jit_trace-pnnx.pt
+  pnnx ./joiner_jit_trace-pnnx.pt fp16=0
+
+.. note::
+
+  We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
+  support quantizing the decoder model yet. We will update this documentation
+  once `ncnn`_ supports it. (Maybe in this year, 2023).
+
+It will generate the following files
+
+.. code-block:: bash
+
+  ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*_jit_trace-pnnx.ncnn.{param,bin}
+
+  -rw-r--r-- 1 kuangfangjun root 503K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  437 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 283M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  79K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  488 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
+
+Let us compare again the file sizes:
+
+----------------------------------------+------------+
+| File name                              | File size  |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.pt              | 283 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.pt              | 1010 KB    |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.pt               | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
+----------------------------------------+------------+
+
+You can see that the file sizes are doubled when we disable ``fp16``.
+
+.. note::
+
+  You can again use ``streaming-ncnn-decode.py`` to test the exported models.
+
+Next, follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
+to modify ``encoder_jit_trace-pnnx.ncnn.param``.
+
+Change
+
+.. code-block:: bash
+
+  7767517
+  1060 1342
+  Input                    in0                      0 1 in0
+
+to
+
+.. code-block:: bash
+
+  7767517
+  1061 1342
+  SherpaMetaData           sherpa_meta_data1        0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
+  Input                    in0                      0 1 in0
+
+.. caution::
+
+  Please follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
+  to change the values for ``SherpaMetaData`` if your model uses a different setting.
+
+
+Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
+`sherpa-ncnn`_.
+
+.. code-block:: bash
+
+  # We will download sherpa-ncnn to $HOME/open-source/
+  # You can change it to anywhere you like.
+  cd $HOME
+  mkdir -p open-source
+
+  cd open-source
+  git clone https://github.com/k2-fsa/sherpa-ncnn
+  cd sherpa-ncnn
+  mkdir build
+  cd build
+  cmake ..
+  make -j 4
+
+  ./bin/generate-int8-scale-table
+
+  export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
+
+The output of the above commands are:
+
+.. code-block:: bash
+
+  (py38) kuangfangjun:build$ generate-int8-scale-table
+  Please provide 10 arg. Currently given: 1
+  Usage:
+  generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
+
+  Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
+
+We need to create a file ``wave_filenames.txt``, in which we need to put
+some calibration wave files. For testing purpose, we put the ``test_wavs``
+from the pre-trained model repository `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+  cat <<EOF > wave_filenames.txt
+  ../test_wavs/1089-134686-0001.wav
+  ../test_wavs/1221-135766-0001.wav
+  ../test_wavs/1221-135766-0002.wav
+  EOF
+
+Now we can calculate the scales needed for quantization with the calibration data:
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+  generate-int8-scale-table \
+    ./encoder_jit_trace-pnnx.ncnn.param \
+    ./encoder_jit_trace-pnnx.ncnn.bin \
+    ./decoder_jit_trace-pnnx.ncnn.param \
+    ./decoder_jit_trace-pnnx.ncnn.bin \
+    ./joiner_jit_trace-pnnx.ncnn.param \
+    ./joiner_jit_trace-pnnx.ncnn.bin \
+    ./encoder-scale-table.txt \
+    ./joiner-scale-table.txt \
+    ./wave_filenames.txt
+
+The output logs are in the following:
+
+.. literalinclude:: ./code/generate-int-8-scale-table-for-conv-emformer.txt
+
+It generates the following two files:
+
+.. code-block:: bash
+
+  $ ls -lh encoder-scale-table.txt joiner-scale-table.txt
+  -rw-r--r-- 1 kuangfangjun root 955K Jan 11 17:28 encoder-scale-table.txt
+  -rw-r--r-- 1 kuangfangjun root  18K Jan 11 17:28 joiner-scale-table.txt
+
+.. caution::
+
+  Definitely, you need more calibration data to compute the scale table.
+
+Finally, let us use the scale table to quantize our models into ``int8``.
+
+.. code-block:: bash
+
+  ncnn2int8
+
+  usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
+
+First, we quantize the encoder model:
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+  ncnn2int8 \
+    ./encoder_jit_trace-pnnx.ncnn.param \
+    ./encoder_jit_trace-pnnx.ncnn.bin \
+    ./encoder_jit_trace-pnnx.ncnn.int8.param \
+    ./encoder_jit_trace-pnnx.ncnn.int8.bin \
+    ./encoder-scale-table.txt
+
+Next, we quantize the joiner model:
+
+.. code-block:: bash
+
+  ncnn2int8 \
+    ./joiner_jit_trace-pnnx.ncnn.param \
+    ./joiner_jit_trace-pnnx.ncnn.bin \
+    ./joiner_jit_trace-pnnx.ncnn.int8.param \
+    ./joiner_jit_trace-pnnx.ncnn.int8.bin \
+    ./joiner-scale-table.txt
+
+The above two commands generate the following 4 files:
+
+.. code-block:: bash
+
+  -rw-r--r-- 1 kuangfangjun root  99M Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.bin
+  -rw-r--r-- 1 kuangfangjun root  78K Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.param
+  -rw-r--r-- 1 kuangfangjun root 774K Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.bin
+  -rw-r--r-- 1 kuangfangjun root  496 Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.param
+
+Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
+
+.. caution::
+
+  ``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
+
+  You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
+  and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
+
+  For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
+  replace the following invocation:
+
+    .. code-block:: bash
+
+      cd egs/librispeech/ASR
+      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+      sherpa-ncnn \
+        ../data/lang_bpe_500/tokens.txt \
+        ./encoder_jit_trace-pnnx.ncnn.param \
+        ./encoder_jit_trace-pnnx.ncnn.bin \
+        ./decoder_jit_trace-pnnx.ncnn.param \
+        ./decoder_jit_trace-pnnx.ncnn.bin \
+        ./joiner_jit_trace-pnnx.ncnn.param \
+        ./joiner_jit_trace-pnnx.ncnn.bin \
+        ../test_wavs/1089-134686-0001.wav
+
+  with
+
+    .. code-block::
+
+      cd egs/librispeech/ASR
+      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+      sherpa-ncnn \
+        ../data/lang_bpe_500/tokens.txt \
+        ./encoder_jit_trace-pnnx.ncnn.int8.param \
+        ./encoder_jit_trace-pnnx.ncnn.int8.bin \
+        ./decoder_jit_trace-pnnx.ncnn.param \
+        ./decoder_jit_trace-pnnx.ncnn.bin \
+        ./joiner_jit_trace-pnnx.ncnn.param \
+        ./joiner_jit_trace-pnnx.ncnn.bin \
+        ../test_wavs/1089-134686-0001.wav
+
+
+The following table compares again the file sizes:
+
+
+----------------------------------------+------------+
+| File name                              | File size  |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.pt              | 283 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.pt              | 1010 KB    |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.pt               | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.int8.bin   | 99 MB      |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.int8.bin    | 774 KB     |
+----------------------------------------+------------+
+
+You can see that the file sizes of the model after ``int8`` quantization
+are much smaller.
+
+.. hint::
+
+    Currently, only linear layers and convolutional layers are quantized
+    with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
+
+.. note::
+
+  You need to test the recognition accuracy after ``int8`` quantization.
+
+You can find the speed comparison at `<https://github.com/k2-fsa/sherpa-ncnn/issues/44>`_.
+
+
+That's it! Have fun with `sherpa-ncnn`_!
--- a/docs/source/model-export/export-ncnn-lstm.rst
+++ b/docs/source/model-export/export-ncnn-lstm.rst
@ -0,0 +1,644 @@
+.. _export_lstm_transducer_models_to_ncnn:
+
+Export LSTM transducer models to ncnn
+-------------------------------------
+
+We use the pre-trained model from the following repository as an example:
+
+`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
+
+We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
+
+.. hint::
+
+  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
+
+.. caution::
+
+  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
+  may ``not`` work.
+
+1. Download the pre-trained model
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. hint::
+
+  You have to install `git-lfs`_ before you continue.
+
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
+
+  git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
+  git lfs pull --include "data/lang_bpe_500/bpe.model"
+
+  cd ..
+
+.. note::
+
+  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
+
+In the above code, we downloaded the pre-trained model into the directory
+``egs/librispeech/ASR/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03``.
+
+2. Install ncnn and pnnx
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
+
+
+3. Export the model via torch.jit.trace()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+First, let us rename our pre-trained model:
+
+.. code-block::
+
+  cd egs/librispeech/ASR
+
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp
+
+  ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
+
+  cd ../..
+
+Next, we use the following code to export our model:
+
+.. code-block:: bash
+
+  dir=./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
+
+  ./lstm_transducer_stateless2/export-for-ncnn.py \
+    --exp-dir $dir/exp \
+    --bpe-model $dir/data/lang_bpe_500/bpe.model \
+    --epoch 99 \
+    --avg 1 \
+    --use-averaged-model 0 \
+    --num-encoder-layers 12 \
+    --encoder-dim 512 \
+    --rnn-hidden-size 1024
+
+.. hint::
+
+  We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
+  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
+
+  If you have trained a model by yourself and if you have all checkpoints
+  available, please first use ``decode.py`` to tune ``--epoch --avg``
+  and select the best combination with with ``--use-averaged-model 1``.
+
+.. note::
+
+  You will see the following log output:
+
+  .. literalinclude:: ./code/export-lstm-transducer-for-ncnn-output.txt
+
+  The log shows the model has ``84176356`` parameters, i.e., ``~84 M``.
+
+  .. code-block::
+
+    ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
+
+    -rw-r--r-- 1 kuangfangjun root 324M Feb 17 10:34 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
+
+  You can see that the file size of the pre-trained model is ``324 MB``, which
+  is roughly equal to ``84176356*4/1024/1024 = 321.107 MB``.
+
+After running ``lstm_transducer_stateless2/export-for-ncnn.py``,
+we will get the following files:
+
+.. code-block:: bash
+
+  ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*pnnx.pt
+
+  -rw-r--r-- 1 kuangfangjun root 1010K Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  318M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  3.0M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
+
+
+.. _lstm-transducer-step-4-export-torchscript-model-via-pnnx:
+
+4. Export torchscript model via pnnx
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. hint::
+
+  Make sure you have set up the ``PATH`` environment variable
+  in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
+  it will throw an error saying that ``pnnx`` could not be found.
+
+Now, it's time to export our models to `ncnn`_ via ``pnnx``.
+
+.. code-block::
+
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+  pnnx ./encoder_jit_trace-pnnx.pt
+  pnnx ./decoder_jit_trace-pnnx.pt
+  pnnx ./joiner_jit_trace-pnnx.pt
+
+It will generate the following files:
+
+.. code-block:: bash
+
+  ls -lh  icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*ncnn*{bin,param}
+
+  -rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 159M Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 1.5M Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  488 Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
+
+
+There are two types of files:
+
+- ``param``: It is a text file containing the model architectures. You can
+  use a text editor to view its content.
+- ``bin``: It is a binary file containing the model parameters.
+
+We compare the file sizes of the models below before and after converting via ``pnnx``:
+
+.. see https://tableconvert.com/restructuredtext-generator
+
+----------------------------------+------------+
+| File name                        | File size  |
+==================================+============+
+| encoder_jit_trace-pnnx.pt        | 318 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.pt        | 1010 KB    |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.pt         | 3.0 MB     |
+----------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin  | 159 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin  | 503 KB     |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin   | 1.5 MB     |
+----------------------------------+------------+
+
+You can see that the file sizes of the models after conversion are about one half
+of the models before conversion:
+
+  - encoder: 318 MB vs 159 MB
+  - decoder: 1010 KB vs 503 KB
+  - joiner: 3.0 MB vs 1.5 MB
+
+The reason is that by default ``pnnx`` converts ``float32`` parameters
+to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
+for ``float16``. Thus, it is ``twice smaller`` after conversion.
+
+.. hint::
+
+  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
+  won't convert ``float32`` to ``float16``.
+
+5. Test the exported models in icefall
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. note::
+
+  We assume you have set up the environment variable ``PYTHONPATH`` when
+  building `ncnn`_.
+
+Now we have successfully converted our pre-trained model to `ncnn`_ format.
+The generated 6 files are what we need. You can use the following code to
+test the converted models:
+
+.. code-block:: bash
+
+  python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
+    --tokens ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt \
+    --encoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param \
+    --encoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin \
+    --decoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param \
+    --decoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin \
+    --joiner-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param \
+    --joiner-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin \
+    ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
+
+.. hint::
+
+  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
+  only 1 wave file as input.
+
+The output is given below:
+
+.. literalinclude:: ./code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
+
+Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
+
+.. _lstm-modify-the-exported-encoder-for-sherpa-ncnn:
+
+6. Modify the exported encoder for sherpa-ncnn
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to use the exported models in `sherpa-ncnn`_, we have to modify
+``encoder_jit_trace-pnnx.ncnn.param``.
+
+Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
+
+.. code-block::
+
+  7767517
+  267 379
+  Input                    in0                      0 1 in0
+
+**Explanation** of the above three lines:
+
+  1. ``7767517``, it is a magic number and should not be changed.
+  2. ``267 379``, the first number ``267`` specifies the number of layers
+     in this file, while ``379`` specifies the number of intermediate outputs
+     of this file
+  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
+     is the layer name of this layer; ``0`` means this layer has no input;
+     ``1`` means this layer has one output; ``in0`` is the output name of
+     this layer.
+
+We need to add 1 extra line and also increment the number of layers.
+The result looks like below:
+
+.. code-block:: bash
+
+  7767517
+  268 379
+  SherpaMetaData           sherpa_meta_data1        0 0 0=3 1=12 2=512 3=1024
+  Input                    in0                      0 1 in0
+
+**Explanation**
+
+  1. ``7767517``, it is still the same
+  2. ``268 379``, we have added an extra layer, so we need to update ``267`` to ``268``.
+     We don't need to change ``379`` since the newly added layer has no inputs or outputs.
+  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=3 1=12 2=512 3=1024``
+     This line is newly added. Its explanation is given below:
+
+      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
+      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
+      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
+      - ``0=3``, 0 is the key and 3 is the value. MUST be ``0=3``
+      - ``1=12``, 1 is the key and 12 is the value of the
+        parameter ``--num-encoder-layers`` that you provided when running
+        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
+      - ``2=512``, 2 is the key and 512 is the value of the
+        parameter ``--encoder-dim`` that you provided when running
+        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
+      - ``3=1024``, 3 is the key and 1024 is the value of the
+        parameter ``--rnn-hidden-size`` that you provided when running
+        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
+
+      For ease of reference, we list the key-value pairs that you need to add
+      in the following table. If your model has a different setting, please
+      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
+      will be ``SAD``.
+
+          +------+-----------------------------+
+          | key  | value                       |
+          +======+=============================+
+          | 0    | 3 (fixed)                   |
+          +------+-----------------------------+
+          | 1    | ``--num-encoder-layers``    |
+          +------+-----------------------------+
+          | 2    | ``--encoder-dim``           |
+          +------+-----------------------------+
+          | 3    | ``--rnn-hidden-size``       |
+          +------+-----------------------------+
+
+  4. ``Input in0 0 1 in0``. No need to change it.
+
+.. caution::
+
+  When you add a new layer ``SherpaMetaData``, please remember to update the
+  number of layers. In our case, update  ``267`` to ``268``. Otherwise,
+  you will be SAD later.
+
+.. hint::
+
+  After adding the new layer ``SherpaMetaData``, you cannot use this model
+  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
+  supported only in `sherpa-ncnn`_.
+
+.. hint::
+
+  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
+  the ``param`` file! You don't need to change the ``bin`` file.
+
+Now you can use this model in `sherpa-ncnn`_.
+Please refer to the following documentation:
+
+  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
+  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
+  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
+  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
+
+We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
+
+  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
+
+    You can find more usages there.
+
+7. (Optional) int8 quantization with sherpa-ncnn
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This step is optional.
+
+In this step, we describe how to quantize our model with ``int8``.
+
+Change :ref:`lstm-transducer-step-4-export-torchscript-model-via-pnnx` to
+disable ``fp16`` when using ``pnnx``:
+
+.. code-block::
+
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+  pnnx ./encoder_jit_trace-pnnx.pt fp16=0
+  pnnx ./decoder_jit_trace-pnnx.pt
+  pnnx ./joiner_jit_trace-pnnx.pt fp16=0
+
+.. note::
+
+  We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
+  support quantizing the decoder model yet. We will update this documentation
+  once `ncnn`_ supports it. (Maybe in this year, 2023).
+
+.. code-block:: bash
+
+  ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*_jit_trace-pnnx.ncnn.{param,bin}
+
+  -rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 317M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  488 Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
+
+
+Let us compare again the file sizes:
+
+----------------------------------------+------------+
+| File name                              | File size  |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.pt              | 318 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.pt              | 1010 KB    |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.pt               | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
+----------------------------------------+------------+
+
+You can see that the file sizes are doubled when we disable ``fp16``.
+
+.. note::
+
+  You can again use ``streaming-ncnn-decode.py`` to test the exported models.
+
+Next, follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
+to modify ``encoder_jit_trace-pnnx.ncnn.param``.
+
+Change
+
+.. code-block:: bash
+
+  7767517
+  267 379
+  Input                    in0                      0 1 in0
+
+to
+
+.. code-block:: bash
+
+  7767517
+  268 379
+  SherpaMetaData           sherpa_meta_data1        0 0 0=3 1=12 2=512 3=1024
+  Input                    in0                      0 1 in0
+
+.. caution::
+
+  Please follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
+  to change the values for ``SherpaMetaData`` if your model uses a different setting.
+
+Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
+`sherpa-ncnn`_.
+
+.. code-block:: bash
+
+  # We will download sherpa-ncnn to $HOME/open-source/
+  # You can change it to anywhere you like.
+  cd $HOME
+  mkdir -p open-source
+
+  cd open-source
+  git clone https://github.com/k2-fsa/sherpa-ncnn
+  cd sherpa-ncnn
+  mkdir build
+  cd build
+  cmake ..
+  make -j 4
+
+  ./bin/generate-int8-scale-table
+
+  export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
+
+The output of the above commands are:
+
+.. code-block:: bash
+
+  (py38) kuangfangjun:build$ generate-int8-scale-table
+  Please provide 10 arg. Currently given: 1
+  Usage:
+  generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
+
+  Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
+
+We need to create a file ``wave_filenames.txt``, in which we need to put
+some calibration wave files. For testing purpose, we put the ``test_wavs``
+from the pre-trained model repository
+`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+  cat <<EOF > wave_filenames.txt
+  ../test_wavs/1089-134686-0001.wav
+  ../test_wavs/1221-135766-0001.wav
+  ../test_wavs/1221-135766-0002.wav
+  EOF
+
+Now we can calculate the scales needed for quantization with the calibration data:
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+  generate-int8-scale-table \
+    ./encoder_jit_trace-pnnx.ncnn.param \
+    ./encoder_jit_trace-pnnx.ncnn.bin \
+    ./decoder_jit_trace-pnnx.ncnn.param \
+    ./decoder_jit_trace-pnnx.ncnn.bin \
+    ./joiner_jit_trace-pnnx.ncnn.param \
+    ./joiner_jit_trace-pnnx.ncnn.bin \
+    ./encoder-scale-table.txt \
+    ./joiner-scale-table.txt \
+    ./wave_filenames.txt
+
+The output logs are in the following:
+
+.. literalinclude:: ./code/generate-int-8-scale-table-for-lstm.txt
+
+It generates the following two files:
+
+.. code-block:: bash
+
+  ls -lh encoder-scale-table.txt joiner-scale-table.txt
+
+  -rw-r--r-- 1 kuangfangjun root 345K Feb 17 12:13 encoder-scale-table.txt
+  -rw-r--r-- 1 kuangfangjun root  17K Feb 17 12:13 joiner-scale-table.txt
+
+.. caution::
+
+  Definitely, you need more calibration data to compute the scale table.
+
+Finally, let us use the scale table to quantize our models into ``int8``.
+
+.. code-block:: bash
+
+  ncnn2int8
+
+  usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
+
+First, we quantize the encoder model:
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+  ncnn2int8 \
+    ./encoder_jit_trace-pnnx.ncnn.param \
+    ./encoder_jit_trace-pnnx.ncnn.bin \
+    ./encoder_jit_trace-pnnx.ncnn.int8.param \
+    ./encoder_jit_trace-pnnx.ncnn.int8.bin \
+    ./encoder-scale-table.txt
+
+Next, we quantize the joiner model:
+
+.. code-block:: bash
+
+  ncnn2int8 \
+    ./joiner_jit_trace-pnnx.ncnn.param \
+    ./joiner_jit_trace-pnnx.ncnn.bin \
+    ./joiner_jit_trace-pnnx.ncnn.int8.param \
+    ./joiner_jit_trace-pnnx.ncnn.int8.bin \
+    ./joiner-scale-table.txt
+
+The above two commands generate the following 4 files:
+
+.. code-block::
+
+  -rw-r--r-- 1 kuangfangjun root 218M Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.bin
+  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.param
+  -rw-r--r-- 1 kuangfangjun root 774K Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.bin
+  -rw-r--r-- 1 kuangfangjun root  496 Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.param
+
+Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
+
+.. caution::
+
+  ``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
+
+  You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
+  and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
+
+  For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
+  replace the following invocation:
+
+    .. code-block::
+
+      cd egs/librispeech/ASR
+      cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
+
+      sherpa-ncnn \
+        ../data/lang_bpe_500/tokens.txt \
+        ./encoder_jit_trace-pnnx.ncnn.param \
+        ./encoder_jit_trace-pnnx.ncnn.bin \
+        ./decoder_jit_trace-pnnx.ncnn.param \
+        ./decoder_jit_trace-pnnx.ncnn.bin \
+        ./joiner_jit_trace-pnnx.ncnn.param \
+        ./joiner_jit_trace-pnnx.ncnn.bin \
+        ../test_wavs/1089-134686-0001.wav
+
+  with
+
+    .. code-block:: bash
+
+      cd egs/librispeech/ASR
+      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
+
+      sherpa-ncnn \
+        ../data/lang_bpe_500/tokens.txt \
+        ./encoder_jit_trace-pnnx.ncnn.int8.param \
+        ./encoder_jit_trace-pnnx.ncnn.int8.bin \
+        ./decoder_jit_trace-pnnx.ncnn.param \
+        ./decoder_jit_trace-pnnx.ncnn.bin \
+        ./joiner_jit_trace-pnnx.ncnn.param \
+        ./joiner_jit_trace-pnnx.ncnn.bin \
+        ../test_wavs/1089-134686-0001.wav
+
+The following table compares again the file sizes:
+
+----------------------------------------+------------+
+| File name                              | File size  |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.pt              | 318 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.pt              | 1010 KB    |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.pt               | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB     |
+----------------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
+----------------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.int8.bin   | 218 MB     |
+----------------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.int8.bin    | 774 KB     |
+----------------------------------------+------------+
+
+You can see that the file size of the joiner model after ``int8`` quantization
+is much smaller. However, the size of the encoder model is even larger than
+the ``fp16`` counterpart. The reason is that `ncnn`_ currently does not support
+quantizing ``LSTM`` layers into ``8-bit``. Please see
+`<https://github.com/Tencent/ncnn/issues/4532>`_
+
+.. hint::
+
+    Currently, only linear layers and convolutional layers are quantized
+    with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
+
+.. note::
+
+  You need to test the recognition accuracy after ``int8`` quantization.
+
+
+That's it! Have fun with `sherpa-ncnn`_!
--- a/docs/source/model-export/export-ncnn-zipformer.rst
+++ b/docs/source/model-export/export-ncnn-zipformer.rst
@ -0,0 +1,388 @@
+.. _export_streaming_zipformer_transducer_models_to_ncnn:
+
+Export streaming Zipformer transducer models to ncnn
+----------------------------------------------------
+
+We use the pre-trained model from the following repository as an example:
+
+`<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_
+
+We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
+
+.. hint::
+
+  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
+
+.. caution::
+
+  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
+  may ``not`` work.
+
+1. Download the pre-trained model
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. hint::
+
+  You have to install `git-lfs`_ before you continue.
+
+
+.. code-block:: bash
+
+  cd egs/librispeech/ASR
+  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+
+  git lfs pull --include "exp/pretrained.pt"
+  git lfs pull --include "data/lang_bpe_500/bpe.model"
+
+  cd ..
+
+.. note::
+
+  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
+
+In the above code, we downloaded the pre-trained model into the directory
+``egs/librispeech/ASR/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29``.
+
+2. Install ncnn and pnnx
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
+
+
+3. Export the model via torch.jit.trace()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+First, let us rename our pre-trained model:
+
+.. code-block::
+
+  cd egs/librispeech/ASR
+
+  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
+
+  ln -s pretrained.pt epoch-99.pt
+
+  cd ../..
+
+Next, we use the following code to export our model:
+
+.. code-block:: bash
+
+  dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+
+  ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
+    --bpe-model $dir/data/lang_bpe_500/bpe.model \
+    --exp-dir $dir/exp \
+    --use-averaged-model 0 \
+    --epoch 99 \
+    --avg 1 \
+    \
+    --decode-chunk-len 32 \
+    --num-left-chunks 4 \
+    --num-encoder-layers "2,4,3,2,4" \
+    --feedforward-dims "1024,1024,2048,2048,1024" \
+    --nhead "8,8,8,8,8" \
+    --encoder-dims "384,384,384,384,384" \
+    --attention-dims "192,192,192,192,192" \
+    --encoder-unmasked-dims "256,256,256,256,256" \
+    --zipformer-downsampling-factors "1,2,4,8,2" \
+    --cnn-module-kernels "31,31,31,31,31" \
+    --decoder-dim 512 \
+    --joiner-dim 512
+
+.. caution::
+
+   If your model has different configuration parameters, please change them accordingly.
+
+.. hint::
+
+  We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
+  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
+
+  If you have trained a model by yourself and if you have all checkpoints
+  available, please first use ``decode.py`` to tune ``--epoch --avg``
+  and select the best combination with with ``--use-averaged-model 1``.
+
+.. note::
+
+  You will see the following log output:
+
+  .. literalinclude:: ./code/export-zipformer-transducer-for-ncnn-output.txt
+
+  The log shows the model has ``69920376`` parameters, i.e., ``~69.9 M``.
+
+  .. code-block:: bash
+
+   ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
+   -rw-r--r-- 1 kuangfangjun root 269M Jan 12 12:53 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
+
+  You can see that the file size of the pre-trained model is ``269 MB``, which
+  is roughly equal to ``69920376*4/1024/1024 = 266.725 MB``.
+
+After running ``pruned_transducer_stateless7_streaming/export-for-ncnn.py``,
+we will get the following files:
+
+.. code-block:: bash
+
+  ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*pnnx.pt
+
+  -rw-r--r-- 1 kuangfangjun root 1022K Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  266M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
+  -rw-r--r-- 1 kuangfangjun root  2.8M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
+
+.. _zipformer-transducer-step-4-export-torchscript-model-via-pnnx:
+
+4. Export torchscript model via pnnx
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. hint::
+
+  Make sure you have set up the ``PATH`` environment variable
+  in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
+  it will throw an error saying that ``pnnx`` could not be found.
+
+Now, it's time to export our models to `ncnn`_ via ``pnnx``.
+
+.. code-block::
+
+  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
+
+  pnnx ./encoder_jit_trace-pnnx.pt
+  pnnx ./decoder_jit_trace-pnnx.pt
+  pnnx ./joiner_jit_trace-pnnx.pt
+
+It will generate the following files:
+
+.. code-block:: bash
+
+  ls -lh  icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*ncnn*{bin,param}
+
+  -rw-r--r-- 1 kuangfangjun root 509K Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  437 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 133M Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root 152K Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param
+  -rw-r--r-- 1 kuangfangjun root 1.4M Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin
+  -rw-r--r-- 1 kuangfangjun root  488 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param
+
+There are two types of files:
+
+- ``param``: It is a text file containing the model architectures. You can
+  use a text editor to view its content.
+- ``bin``: It is a binary file containing the model parameters.
+
+We compare the file sizes of the models below before and after converting via ``pnnx``:
+
+.. see https://tableconvert.com/restructuredtext-generator
+
+----------------------------------+------------+
+| File name                        | File size  |
+==================================+============+
+| encoder_jit_trace-pnnx.pt        | 266 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.pt        | 1022 KB    |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.pt         | 2.8 MB     |
+----------------------------------+------------+
+| encoder_jit_trace-pnnx.ncnn.bin  | 133 MB     |
+----------------------------------+------------+
+| decoder_jit_trace-pnnx.ncnn.bin  | 509 KB     |
+----------------------------------+------------+
+| joiner_jit_trace-pnnx.ncnn.bin   | 1.4 MB     |
+----------------------------------+------------+
+
+You can see that the file sizes of the models after conversion are about one half
+of the models before conversion:
+
+  - encoder: 266 MB vs 133 MB
+  - decoder: 1022 KB vs 509 KB
+  - joiner: 2.8 MB vs 1.4 MB
+
+The reason is that by default ``pnnx`` converts ``float32`` parameters
+to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
+for ``float16``. Thus, it is ``twice smaller`` after conversion.
+
+.. hint::
+
+  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
+  won't convert ``float32`` to ``float16``.
+
+5. Test the exported models in icefall
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. note::
+
+  We assume you have set up the environment variable ``PYTHONPATH`` when
+  building `ncnn`_.
+
+Now we have successfully converted our pre-trained model to `ncnn`_ format.
+The generated 6 files are what we need. You can use the following code to
+test the converted models:
+
+.. code-block:: bash
+
+  python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
+    --tokens ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt \
+    --encoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param \
+    --encoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin \
+    --decoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param \
+    --decoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin \
+    --joiner-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param \
+    --joiner-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin \
+    ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
+
+.. hint::
+
+  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
+  only 1 wave file as input.
+
+The output is given below:
+
+.. literalinclude:: ./code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
+
+Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
+
+.. _zipformer-modify-the-exported-encoder-for-sherpa-ncnn:
+
+6. Modify the exported encoder for sherpa-ncnn
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In order to use the exported models in `sherpa-ncnn`_, we have to modify
+``encoder_jit_trace-pnnx.ncnn.param``.
+
+Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
+
+.. code-block::
+
+  7767517
+  2028 2547
+  Input                    in0                      0 1 in0
+
+**Explanation** of the above three lines:
+
+  1. ``7767517``, it is a magic number and should not be changed.
+  2. ``2028 2547``, the first number ``2028`` specifies the number of layers
+     in this file, while ``2547`` specifies the number of intermediate outputs
+     of this file
+  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
+     is the layer name of this layer; ``0`` means this layer has no input;
+     ``1`` means this layer has one output; ``in0`` is the output name of
+     this layer.
+
+We need to add 1 extra line and also increment the number of layers.
+The result looks like below:
+
+.. code-block:: bash
+
+  7767517
+  2029 2547
+  SherpaMetaData           sherpa_meta_data1        0 0 0=2 1=32 2=4 3=7 15=1 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
+  Input                    in0                      0 1 in0
+
+**Explanation**
+
+  1. ``7767517``, it is still the same
+  2. ``2029 2547``, we have added an extra layer, so we need to update ``2028`` to ``2029``.
+     We don't need to change ``2547`` since the newly added layer has no inputs or outputs.
+  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31``
+     This line is newly added. Its explanation is given below:
+
+      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
+      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
+      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
+      - ``0=2``, 0 is the key and 2 is the value. MUST be ``0=2``
+      - ``1=32``, 1 is the key and 32 is the value of the
+        parameter ``--decode-chunk-len`` that you provided when running
+        ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``2=4``, 2 is the key and 4 is the value of the
+        parameter ``--num-left-chunks`` that you provided when running
+        ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``3=7``, 3 is the key and 7 is the value of for the amount of padding
+        used in the Conv2DSubsampling layer. It should be 7 for zipformer
+        if you don't change zipformer.py.
+      - ``15=1``, attribute 15, this is the model version. Starting from
+        `sherpa-ncnn`_ v2.0, we require that the model version has to
+        be >= 1.
+      - ``-23316=5,2,4,3,2,4``, attribute 16, this is an array attribute.
+        It is attribute 16 since -23300 - (-23316) = 16.
+        The first element of the array is the length of the array, which is 5 in our case.
+        ``2,4,3,2,4`` is the value of ``--num-encoder-layers``that you provided
+        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``-23317=5,384,384,384,384,384``, attribute 17.
+        The first element of the array is the length of the array, which is 5 in our case.
+        ``384,384,384,384,384`` is the value of ``--encoder-dims``that you provided
+        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``-23318=5,192,192,192,192,192``, attribute 18.
+        The first element of the array is the length of the array, which is 5 in our case.
+        ``192,192,192,192,192`` is the value of ``--attention-dims`` that you provided
+        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``-23319=5,1,2,4,8,2``, attribute 19.
+        The first element of the array is the length of the array, which is 5 in our case.
+        ``1,2,4,8,2`` is the value of ``--zipformer-downsampling-factors`` that you provided
+        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+      - ``-23320=5,31,31,31,31,31``, attribute 20.
+        The first element of the array is the length of the array, which is 5 in our case.
+        ``31,31,31,31,31`` is the value of ``--cnn-module-kernels`` that you provided
+        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
+
+      For ease of reference, we list the key-value pairs that you need to add
+      in the following table. If your model has a different setting, please
+      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
+      will be ``SAD``.
+
+          +----------+--------------------------------------------+
+          | key      | value                                      |
+          +==========+============================================+
+          | 0        | 2 (fixed)                                  |
+          +----------+--------------------------------------------+
+          | 1        | ``-decode-chunk-len``                      |
+          +----------+--------------------------------------------+
+          | 2        | ``--num-left-chunks``                      |
+          +----------+--------------------------------------------+
+          | 3        | 7 (if you don't change code)               |
+          +----------+--------------------------------------------+
+          | 15       | 1 (The model version)                      |
+          +----------+--------------------------------------------+
+          |-23316    | ``--num-encoder-layer``                    |
+          +----------+--------------------------------------------+
+          |-23317    | ``--encoder-dims``                         |
+          +----------+--------------------------------------------+
+          |-23318    | ``--attention-dims``                       |
+          +----------+--------------------------------------------+
+          |-23319    | ``--zipformer-downsampling-factors``       |
+          +----------+--------------------------------------------+
+          |-23320    | ``--cnn-module-kernels``                   |
+          +----------+--------------------------------------------+
+
+  4. ``Input in0 0 1 in0``. No need to change it.
+
+.. caution::
+
+  When you add a new layer ``SherpaMetaData``, please remember to update the
+  number of layers. In our case, update  ``2028`` to ``2029``. Otherwise,
+  you will be SAD later.
+
+.. hint::
+
+  After adding the new layer ``SherpaMetaData``, you cannot use this model
+  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
+  supported only in `sherpa-ncnn`_.
+
+.. hint::
+
+  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
+  the ``param`` file! You don't need to change the ``bin`` file.
+
+Now you can use this model in `sherpa-ncnn`_.
+Please refer to the following documentation:
+
+  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
+  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
+  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
+  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
+
+We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
+
+  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
+
+    You can find more usages there.
--- a/docs/source/model-export/export-ncnn.rst
+++ b/docs/source/model-export/export-ncnn.rst
@ -1,12 +1,37 @@
 Export to ncnn
 ==============

-We support exporting LSTM transducer models to `ncnn <https://github.com/tencent/ncnn>`_.
+We support exporting the following models
+to `ncnn <https://github.com/tencent/ncnn>`_:

-Please refer to :ref:`export-model-for-ncnn` for details.
+  - `Zipformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_

-We also provide `<https://github.com/k2-fsa/sherpa-ncnn>`_
-performing speech recognition using ``ncnn`` with exported models.
-It has been tested on Linux, macOS, Windows, and Raspberry Pi. The project is
-self-contained and can be statically linked to produce a binary containing
-everything needed.
+  - `LSTM transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
+
+  - `ConvEmformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conv_emformer_transducer_stateless2>`_
+
+We also provide `sherpa-ncnn`_
+for performing speech recognition using `ncnn`_ with exported models.
+It has been tested on the following platforms:
+
+  - Linux
+  - macOS
+  - Windows
+  - ``Android``
+  - ``iOS``
+  - ``Raspberry Pi``
+  - `爱芯派 <https://wiki.sipeed.com/hardware/zh/>`_ (`MAIX-III AXera-Pi <https://wiki.sipeed.com/hardware/en/maixIII/ax-pi/axpi.html>`_).
+  - `RV1126 <https://www.rock-chips.com/a/en/products/RV11_Series/2020/0427/1076.html>`_
+
+`sherpa-ncnn`_ is self-contained and can be statically linked to produce
+a binary containing everything needed. Please refer
+to its documentation for details:
+
+ - `<https://k2-fsa.github.io/sherpa/ncnn/index.html>`_
+
+
+.. toctree::
+
+   export-ncnn-zipformer
+   export-ncnn-conv-emformer
+   export-ncnn-lstm
--- a/docs/source/model-export/export-onnx.rst
+++ b/docs/source/model-export/export-onnx.rst
@ -1,69 +1,104 @@
 Export to ONNX
 ==============

-In this section, we describe how to export models to ONNX.
+In this section, we describe how to export models to `ONNX`_.

 .. hint::

-  Only non-streaming conformer transducer models are tested.
+   Before you continue, please run:
+
+    .. code-block:: bash
+
+        pip install onnx


-When to use it
--------------
+In each recipe, there is a file called ``export-onnx.py``, which is used
+to export trained models to `ONNX`_.

-It you want to use an inference framework that supports ONNX
-to run the pretrained model.
+There is also a file named ``onnx_pretrained.py``, which you can use
+the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
+
+sherpa-onnx
+-----------
+
+We have a separate repository `sherpa-onnx`_ for deploying your exported models
+on various platforms such as:
+
+  - iOS
+  - Android
+  - Raspberry Pi
+  - Linux/macOS/Windows


-How to export
-------------
+Please see the documentation of `sherpa-onnx`_ for details:

-We use
-`<https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless3>`_
-as an example in the following.
+  `<https://k2-fsa.github.io/sherpa/onnx/index.html>`_
+
+Example
+-------
+
+In the following, we demonstrate how to export a streaming Zipformer pre-trained
+model from
+`<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11>`_
+to `ONNX`_.
+
+Download the pre-trained model
+------------------------------
+
+.. hint::
+
+   We assume you have installed `git-lfs`_.

 .. code-block:: bash

-    cd egs/librispeech/ASR
-    epoch=14
-    avg=2

-    ./pruned_transducer_stateless3/export.py \
-      --exp-dir ./pruned_transducer_stateless3/exp \
-      --bpe-model data/lang_bpe_500/bpe.model \
-      --epoch $epoch \
-      --avg $avg \
-      --onnx 1
+  cd egs/librispeech/ASR

-It will generate the following files inside ``pruned_transducer_stateless3/exp``:
+  repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  repo=$(basename $repo_url)

-  - ``encoder.onnx``
-  - ``decoder.onnx``
-  - ``joiner.onnx``
-  - ``joiner_encoder_proj.onnx``
-  - ``joiner_decoder_proj.onnx``
+  pushd $repo
+  git lfs pull --include "data/lang_bpe_500/bpe.model"
+  git lfs pull --include "exp/pretrained.pt"
+  cd exp
+  ln -s pretrained.pt epoch-99.pt
+  popd

-You can use ``./pruned_transducer_stateless3/exp/onnx_pretrained.py`` to decode
-waves with the generated files:
+Export the model to ONNX
+------------------------

 .. code-block:: bash

-  ./pruned_transducer_stateless3/onnx_pretrained.py \
-    --bpe-model ./data/lang_bpe_500/bpe.model \
-    --encoder-model-filename ./pruned_transducer_stateless3/exp/encoder.onnx \
-    --decoder-model-filename ./pruned_transducer_stateless3/exp/decoder.onnx \
-    --joiner-model-filename ./pruned_transducer_stateless3/exp/joiner.onnx \
-    --joiner-encoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_encoder_proj.onnx \
-    --joiner-decoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_decoder_proj.onnx \
-    /path/to/foo.wav \
-    /path/to/bar.wav \
-    /path/to/baz.wav
+  ./pruned_transducer_stateless7_streaming/export-onnx.py \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
+    --use-averaged-model 0 \
+    --epoch 99 \
+    --avg 1 \
+    --decode-chunk-len 32 \
+    --exp-dir $repo/exp/

+.. warning::

-How to use the exported model
-----------------------------
+   ``export-onnx.py`` from different recipes has different options.

-We also provide `<https://github.com/k2-fsa/sherpa-onnx>`_
-performing speech recognition using `onnxruntime <https://github.com/microsoft/onnxruntime>`_
-with exported models.
-It has been tested on Linux, macOS, and Windows.
+   In the above example, ``--decode-chunk-len`` is specific for the
+   streaming Zipformer. Other models won't have such an option.
+
+It will generate the following 3 files in ``$repo/exp``
+
+  - ``encoder-epoch-99-avg-1.onnx``
+  - ``decoder-epoch-99-avg-1.onnx``
+  - ``joiner-epoch-99-avg-1.onnx``
+
+Decode sound files with exported ONNX models
+--------------------------------------------
+
+.. code-block:: bash
+
+  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
+    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
+    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
+    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
+    --tokens $repo/data/lang_bpe_500/tokens.txt \
+    $repo/test_wavs/1089-134686-0001.wav
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst
@ -0,0 +1,223 @@
+Distillation with HuBERT
+========================
+
+This tutorial shows you how to perform knowledge distillation in `icefall`_
+with the `LibriSpeech`_ dataset. The distillation method
+used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD).
+Please have a look at our paper `Predicting Multi-Codebook Vector Quantization Indexes for Knowledge Distillation <https://arxiv.org/abs/2211.00508>`_
+for more details about MVQ-KD.
+
+.. note::
+
+    This tutorial is based on recipe
+    `pruned_transducer_stateless4 <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless4>`_.
+    Currently, we only implement MVQ-KD in this recipe. However, MVQ-KD is theoretically applicable to all recipes
+    with only minor changes needed. Feel free to try out MVQ-KD in different recipes. If you
+    encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`_.
+
+.. note::
+
+  We assume you have read the page :ref:`install icefall` and have setup
+  the environment for `icefall`_.
+
+.. HINT::
+
+  We recommend you to use a GPU or several GPUs to run this recipe.
+
+Data preparation
+----------------
+
+We first prepare necessary training data for `LibriSpeech`_.
+This is the same as in :ref:`non_streaming_librispeech_pruned_transducer_stateless`.
+
+.. hint::
+
+   The data preparation is the same as other recipes on LibriSpeech dataset,
+   if you have finished this step, you can skip to :ref:`codebook_index_preparation` directly.
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./prepare.sh
+
+The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
+All you need to do is to run it.
+
+The data preparation contains several stages, you can use the following two
+options:
+
+  - ``--stage``
+  - ``--stop-stage``
+
+to control which stage(s) should be run. By default, all stages are executed.
+
+For example,
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./prepare.sh --stage 0 --stop-stage 0 # run only stage 0
+  $ ./prepare.sh --stage 2 --stop-stage 5 # run from stage 2 to stage 5
+
+.. HINT::
+
+  If you have pre-downloaded the `LibriSpeech`_
+  dataset and the `musan`_ dataset, say,
+  they are saved in ``/tmp/LibriSpeech`` and ``/tmp/musan``, you can modify
+  the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
+  ``./prepare.sh`` won't re-download them.
+
+.. NOTE::
+
+  All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
+  are saved in ``./data`` directory.
+
+We provide the following YouTube video showing how to run ``./prepare.sh``.
+
+.. note::
+
+   To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
+   the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
+
+      `<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
+
+..  youtube:: ofEIoJL-mGM
+
+
+.. _codebook_index_preparation:
+
+Codebook index preparation
+--------------------------
+
+Here, we prepare necessary data for MVQ-KD. This requires the generation
+of codebook indexes (please read our `paper <https://arxiv.org/abs/2211.00508>`_.
+if you are interested in details). In this tutorial, we use the pre-computed
+codebook indexes for convenience. The only thing you need to do is to
+run `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_.
+
+.. note::
+
+  There are 5 stages in total, the first and second stage will be automatically skipped
+  when choosing to downloaded codebook indexes prepared by `icefall`_.
+  Of course, you can extract and compute the codebook indexes by yourself. This
+  will require you downloading a HuBERT-XL model and it can take a while for
+  the extraction of codebook indexes.
+
+
+As usual, you can control the stages you want to run by specifying the following
+two options:
+
+  - ``--stage``
+  - ``--stop-stage``
+
+For example,
+
+.. code-block:: bash
+
+  $ cd egs/librispeech/ASR
+  $ ./distillation_with_hubert.sh --stage 0 --stop-stage 0 # run only stage 0
+  $ ./distillation_with_hubert.sh --stage 2 --stop-stage 4 # run from stage 2 to stage 5
+
+Here are a few options in `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_
+you need to know before you proceed.
+
+- ``--full_libri`` If True, use full 960h data. Otherwise only ``train-clean-100`` will be used
+- ``--use_extracted_codebook`` If True, the first two stages will be skipped and the codebook
+  indexes uploaded by us will be downloaded.
+
+Since we are using the pre-computed codebook indexes, we set
+``use_extracted_codebook=True``. If you want to do full `LibriSpeech`_
+experiments, please set ``full_libri=True``.
+
+The following command downloads the pre-computed codebook indexes
+and prepares MVQ-augmented training manifests.
+
+.. code-block:: bash
+
+  $ ./distillation_with_hubert.sh --stage 2 --stop-stage 2 # run only stage 2
+
+Please see the
+following screenshot for the output of an example execution.
+
+.. figure:: ./images/distillation_codebook.png
+  :width: 800
+  :alt: Downloading codebook indexes and preparing training manifest.
+  :align: center
+
+  Downloading codebook indexes and preparing training manifest.
+
+.. hint::
+
+  The codebook indexes we prepared for you in this tutorial
+  are extracted from the 36-th layer of a fine-tuned HuBERT-XL model
+  with 8 codebooks. If you want to try other configurations, please
+  set ``use_extracted_codebook=False`` and set ``embedding_layer`` and
+  ``num_codebooks`` by yourself.
+
+Now, you should see the following files under the directory ``./data/vq_fbank_layer36_cb8``.
+
+.. figure:: ./images/distillation_directory.png
+  :width: 800
+  :alt: MVQ-augmented training manifests
+  :align: center
+
+  MVQ-augmented training manifests.
+
+Whola! You are ready to perform knowledge distillation training now!
+
+Training
+--------
+
+To perform training, please run stage 3 by executing the following command.
+
+.. code-block:: bash
+
+  $ ./prepare.sh --stage 3 --stop-stage 3 # run MVQ training
+
+Here is the code snippet for training:
+
+.. code-block:: bash
+
+  WORLD_SIZE=$(echo ${CUDA_VISIBLE_DEVICES} | awk '{n=split($1, _, ","); print n}')
+
+  ./pruned_transducer_stateless6/train.py \
+    --manifest-dir ./data/vq_fbank_layer36_cb8 \
+    --master-port 12359 \
+    --full-libri $full_libri \
+    --spec-aug-time-warp-factor -1 \
+    --max-duration 300 \
+    --world-size ${WORLD_SIZE} \
+    --num-epochs 30 \
+    --exp-dir $exp_dir \
+    --enable-distillation True \
+    --codebook-loss-scale 0.01
+
+There are a few training arguments in the following
+training commands that should be paid attention to.
+
+  - ``--enable-distillation`` If True, knowledge distillation training is enabled.
+  - ``--codebook-loss-scale`` The scale of the knowledge distillation loss.
+  - ``--manifest-dir`` The path to the MVQ-augmented manifest.
+
+
+Decoding
+--------
+
+After training finished, you can test the performance on using
+the following command.
+
+.. code-block:: bash
+
+  export CUDA_VISIBLE_DEVICES=0
+  ./pruned_transducer_stateless6/train.py \
+    --decoding-method "modified_beam_search" \
+    --epoch 30 \
+    --avg 10 \
+    --max-duration 200 \
+    --exp-dir $exp_dir \
+    --enable-distillation True
+
+You should get similar results as `here <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS-100hours.md#distillation-with-hubert>`_.
+
+That's all! Feel free to experiment with your own setups and report your results.
+If you encounter any problems during training, please open up an issue `here <https://github.com/k2-fsa/icefall/issues>`_.
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/images/distillation_codebook.png
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/images/distillation_codebook.png
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/images/distillation_directory.png
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/images/distillation_directory.png
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/index.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/index.rst
@ -9,3 +9,4 @@ LibriSpeech
   pruned_transducer_stateless
   zipformer_mmi
   zipformer_ctc_blankskip
+   distillation
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/pruned_transducer_stateless.rst
@ -1,3 +1,5 @@
+.. _non_streaming_librispeech_pruned_transducer_stateless:
+
 Pruned transducer statelessX
 ============================

--- a/docs/source/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/zipformer_ctc_blankskip.rst
@ -299,11 +299,11 @@ to run the training part first.

    - (1) ``epoch-1.pt``, ``epoch-2.pt``, ..., which are saved at the end
      of each epoch. You can pass ``--epoch`` to
-      ``pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py`` to use them.
+      ``pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py`` to use them.

    - (2) ``checkpoints-436000.pt``, ``epoch-438000.pt``, ..., which are saved
      every ``--save-every-n`` batches. You can pass ``--iter`` to
-      ``pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py`` to use them.
+      ``pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py`` to use them.

    We suggest that you try both types of checkpoints and choose the one
    that produces the lowest WERs.
@ -311,7 +311,7 @@ to run the training part first.
 .. code-block:: bash

  $ cd egs/librispeech/ASR
-  $ ./pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py --help
+  $ ./pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py --help

 shows the options for decoding.

@ -320,7 +320,7 @@ The following shows the example using ``epoch-*.pt``:
 .. code-block:: bash

    for m in greedy_search fast_beam_search modified_beam_search; do
-        ./pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py \
+        ./pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py \
            --epoch 30 \
            --avg 13 \
            --exp-dir pruned_transducer_stateless7_ctc_bs/exp \
@ -333,7 +333,7 @@ To test CTC branch, you can use the following command:
 .. code-block:: bash

    for m in ctc-decoding 1best; do
-        ./pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py \
+        ./pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py \
            --epoch 30 \
            --avg 13 \
            --exp-dir pruned_transducer_stateless7_ctc_bs/exp \
@ -367,7 +367,7 @@ It will generate a file ``./pruned_transducer_stateless7_ctc_bs/exp/pretrained.p

 .. hint::

-   To use the generated ``pretrained.pt`` for ``pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py``,
+   To use the generated ``pretrained.pt`` for ``pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py``,
   you can run:

   .. code-block:: bash
@ -376,7 +376,7 @@ It will generate a file ``./pruned_transducer_stateless7_ctc_bs/exp/pretrained.p
      ln -s pretrained epoch-9999.pt

   And then pass ``--epoch 9999 --avg 1 --use-averaged-model 0`` to
-   ``./pruned_transducer_stateless7_ctc_bs/ctc_guild_decode_bs.py``.
+   ``./pruned_transducer_stateless7_ctc_bs/ctc_guide_decode_bs.py``.

 To use the exported model with ``./pruned_transducer_stateless7_ctc_bs/pretrained.py``, you
 can run:
@ -447,7 +447,8 @@ Download pretrained models
 If you don't want to train from scratch, you can download the pretrained models
 by visiting the following links:

-  - `<https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2022-12-14>`_
+  - trained on LibriSpeech 100h: `<https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2022-12-14>`_
+  - trained on LibriSpeech 960h: `<https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29>`_

  See `<https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md>`_
  for the details of the above pretrained models
--- a/docs/source/recipes/Streaming-ASR/introduction.rst
+++ b/docs/source/recipes/Streaming-ASR/introduction.rst
@ -30,8 +30,9 @@ In icefall, we implement the streaming conformer the way just like what `WeNet <
   See :doc:`Pruned transducer statelessX <librispeech/pruned_transducer_stateless>` for more details.

 .. HINT::
-   If you want to adapt a non-streaming conformer model to be streaming, please refer
-   to `this pull request <https://github.com/k2-fsa/icefall/pull/454>`_.
+   If you want to modify a non-streaming conformer recipe to support both streaming and non-streaming, please refer
+   to `this pull request <https://github.com/k2-fsa/icefall/pull/454>`_.  After adding the code needed by streaming training,
+   you have to re-train it with the extra arguments metioned in the docs above to get a streaming model.


 Streaming Emformer
--- a/docs/source/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst
+++ b/docs/source/recipes/Streaming-ASR/librispeech/lstm_pruned_stateless_transducer.rst
@ -515,133 +515,6 @@ To use the generated files with ``./lstm_transducer_stateless2/jit_pretrained``:
   Please see `<https://k2-fsa.github.io/sherpa/python/streaming_asr/lstm/english/server.html>`_
   for how to use the exported models in ``sherpa``.

-.. _export-model-for-ncnn:
-
-Export model for ncnn
-~~~~~~~~~~~~~~~~~~~~~
-
-We support exporting pretrained LSTM transducer models to
-`ncnn <https://github.com/tencent/ncnn>`_ using
-`pnnx <https://github.com/Tencent/ncnn/tree/master/tools/pnnx>`_.
-
-First, let us install a modified version of ``ncnn``:
-
-.. code-block:: bash
-
-  git clone https://github.com/csukuangfj/ncnn
-  cd ncnn
-  git submodule update --recursive --init
-
-  # Note: We don't use "python setup.py install" or "pip install ." here
-
-  mkdir -p build-wheel
-  cd build-wheel
-
-  cmake \
-    -DCMAKE_BUILD_TYPE=Release \
-    -DNCNN_PYTHON=ON \
-    -DNCNN_BUILD_BENCHMARK=OFF \
-    -DNCNN_BUILD_EXAMPLES=OFF \
-    -DNCNN_BUILD_TOOLS=ON \
-    ..
-
-  make -j4
-
-  cd ..
-
-  # Note: $PWD here is /path/to/ncnn
-
-  export PYTHONPATH=$PWD/python:$PYTHONPATH
-  export PATH=$PWD/tools/pnnx/build/src:$PATH
-  export PATH=$PWD/build-wheel/tools/quantize:$PATH
-
-  # now build pnnx
-  cd tools/pnnx
-  mkdir build
-  cd build
-  cmake ..
-  make -j4
-
-  ./src/pnnx
-
-.. note::
-
-   We assume that you have added the path to the binary ``pnnx`` to the
-   environment variable ``PATH``.
-
-   We also assume that you have added ``build/tools/quantize`` to the environment
-   variable ``PATH`` so that you are able to use ``ncnn2int8`` later.
-
-Second, let us export the model using ``torch.jit.trace()`` that is suitable
-for ``pnnx``:
-
-.. code-block:: bash
-
-  iter=468000
-  avg=16
-
-  ./lstm_transducer_stateless2/export.py \
-    --exp-dir ./lstm_transducer_stateless2/exp \
-    --bpe-model data/lang_bpe_500/bpe.model \
-    --iter $iter \
-    --avg  $avg \
-    --pnnx 1
-
-It will generate 3 files:
-
-  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt``
-  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt``
-  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt``
-
-Third, convert torchscript model to ``ncnn`` format:
-
-.. code-block::
-
-   pnnx ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.pt
-   pnnx ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.pt
-   pnnx ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.pt
-
-It will generate the following files:
-
-  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param``
-  - ``./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin``
-  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param``
-  - ``./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin``
-  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param``
-  - ``./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin``
-
-To use the above generated files, run:
-
-.. code-block:: bash
-
-  ./lstm_transducer_stateless2/ncnn-decode.py \
-   --bpe-model-filename ./data/lang_bpe_500/bpe.model \
-   --encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
-   --encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
-   --decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
-   --decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
-   --joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
-   --joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
-   /path/to/foo.wav
-
-.. code-block:: bash
-
-  ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
-   --bpe-model-filename ./data/lang_bpe_500/bpe.model \
-   --encoder-param-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.param \
-   --encoder-bin-filename ./lstm_transducer_stateless2/exp/encoder_jit_trace-pnnx.ncnn.bin \
-   --decoder-param-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.param \
-   --decoder-bin-filename ./lstm_transducer_stateless2/exp/decoder_jit_trace-pnnx.ncnn.bin \
-   --joiner-param-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.param \
-   --joiner-bin-filename ./lstm_transducer_stateless2/exp/joiner_jit_trace-pnnx.ncnn.bin \
-   /path/to/foo.wav
-
-To use the above generated files in C++, please see
-`<https://github.com/k2-fsa/sherpa-ncnn>`_
-
-It is able to generate a static linked executable that can be run on Linux, Windows,
-macOS, Raspberry Pi, etc, without external dependencies.
-
 Download pretrained models
 --------------------------

--- a/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/decode.py
+++ b/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2/decode.py
@ -391,18 +391,14 @@ def save_results(
 ):
    test_set_wers = dict()
    for key, results in results_dict.items():
-        recog_path = (
-            params.res_dir / f"recogs-{test_set_name}-{key}-{params.suffix}.txt"
-        )
+        recog_path = params.res_dir / f"recogs-{test_set_name}-{params.suffix}.txt"
        results = sorted(results)
        store_transcripts(filename=recog_path, texts=results)
        logging.info(f"The transcripts are stored in {recog_path}")

        # The following prints out WERs, per-word error statistics and aligned
        # ref/hyp pairs.
-        errs_filename = (
-            params.res_dir / f"errs-{test_set_name}-{key}-{params.suffix}.txt"
-        )
+        errs_filename = params.res_dir / f"errs-{test_set_name}-{params.suffix}.txt"
        with open(errs_filename, "w") as f:
            wer = write_error_stats(
                f, f"{test_set_name}-{key}", results, enable_log=True
@ -412,9 +408,7 @@ def save_results(
        logging.info("Wrote detailed error stats to {}".format(errs_filename))

    test_set_wers = sorted(test_set_wers.items(), key=lambda x: x[1])
-    errs_info = (
-        params.res_dir / f"wer-summary-{test_set_name}-{key}-{params.suffix}.txt"
-    )
+    errs_info = params.res_dir / f"wer-summary-{test_set_name}-{params.suffix}.txt"
    with open(errs_info, "w") as f:
        print("settings\tWER", file=f)
        for key, val in test_set_wers:
--- a/egs/aishell/ASR/README.md
+++ b/egs/aishell/ASR/README.md
@ -1,7 +1,7 @@

 # Introduction

-Please refer to <https://icefall.readthedocs.io/en/latest/recipes/aishell/index.html>
+Please refer to <https://icefall.readthedocs.io/en/latest/recipes/Non-streaming-ASR/aishell/index.html>
 for how to run models in this recipe.


@ -17,6 +17,7 @@ The following table lists the differences among them.
 | `transducer_stateless_modified`    | Conformer | Embedding + Conv1d | with modified transducer from `optimized_transducer`                     |
 | `transducer_stateless_modified-2`  | Conformer | Embedding + Conv1d | with modified transducer from `optimized_transducer` + extra data      |
 | `pruned_transducer_stateless3`     | Conformer (reworked) | Embedding + Conv1d | pruned RNN-T + reworked model with random combiner + using aidatatang_20zh as extra data|
+| `pruned_transducer_stateless7`     | Zipformer | Embedding | pruned RNN-T + zipformer encoder + stateless decoder with context-size 1 |

 The decoder in `transducer_stateless` is modified from the paper
 [Rnn-Transducer with Stateless Prediction Network](https://ieeexplore.ieee.org/document/9054419/).
--- a/egs/aishell/ASR/RESULTS.md
+++ b/egs/aishell/ASR/RESULTS.md
@ -2,6 +2,109 @@

 ### Aishell training result(Stateless Transducer)

+#### Pruned transducer stateless 7
+
+[./pruned_transducer_stateless7](./pruned_transducer_stateless7)
+
+It's Zipformer with Pruned RNNT loss.
+
+|                        | test | dev  | comment                               |
+|------------------------|------|------|---------------------------------------|
+| greedy search          | 5.02 | 4.61 | --epoch 42 --avg 6 --max-duration 600 |
+| modified beam search   | 4.81 | 4.4 | --epoch 42 --avg 6 --max-duration 600 |
+| fast beam search       | 4.91 | 4.52 | --epoch 42 --avg 6 --max-duration 600 |
+
+Training command is:
+
+```bash
+./prepare.sh
+
+export CUDA_VISIBLE_DEVICES="0,1"
+
+./pruned_transducer_stateless7/train.py \
+  --world-size 2 \
+  --num-epochs 50 \
+  --start-epoch 1 \
+  --use-fp16 1 \
+  --exp-dir pruned_transducer_stateless7/exp \
+  --context-size 1 \
+  --max-duration 300
+```
+
+**Caution**: It uses `--context-size=1`.
+
+The tensorboard log is available at
+<https://tensorboard.dev/experiment/MHYo3ApfQxaCdYLr38cQOQ>
+
+The decoding command is:
+```bash
+for m in greedy_search modified_beam_search fast_beam_search ; do
+  ./pruned_transducer_stateless7/decode.py \
+    --epoch 42 \
+    --avg 6 \
+    --exp-dir ./pruned_transducer_stateless7/exp \
+    --lang-dir data/lang_char \
+    --max-duration 300 \
+    --context-size 1 \
+    --decoding-method $m
+
+done
+```
+
+Pretrained models, training logs, decoding logs, and decoding results
+are available at
+<https://huggingface.co/marcoyang/icefall-asr-aishell-zipformer-pruned-transducer-stateless7-2023-03-21>
+#### Pruned transducer stateless 7 (zipformer)
+
+See <https://github.com/k2-fsa/icefall/pull/986>
+
+[./pruned_transducer_stateless7_bbpe](./pruned_transducer_stateless7_bbpe)
+
+**Note**: The modeling units are byte level BPEs
+
+The best results I have gotten are:
+
+Vocab size | Greedy search(dev & test) | Modified beam search(dev & test) | Fast beam search (dev & test)  | Fast beam search LG (dev & test) | comments
+-- | -- | -- | -- | -- | --
+500 | 4.31 & 4.59 | 4.25 & 4.54 | 4.27 & 4.55 |  4.07 & 4.38 | --epoch 48 --avg 29
+
+The training command:
+
+```
+export CUDA_VISIBLE_DEVICES="4,5,6,7"
+
+./pruned_transducer_stateless7_bbpe/train.py \
+  --world-size 4 \
+  --num-epochs 50 \
+  --start-epoch 1 \
+  --use-fp16 1 \
+  --max-duration 800 \
+  --bpe-model data/lang_bbpe_500/bbpe.model \
+  --exp-dir pruned_transducer_stateless7_bbpe/exp \
+  --lr-epochs 6 \
+  --master-port 12535
+```
+
+The decoding command:
+
+```
+for m in greedy_search modified_beam_search fast_beam_search fast_beam_search_LG; do
+    ./pruned_transducer_stateless7_bbpe/decode.py \
+      --epoch 48 \
+      --avg 29 \
+      --exp-dir ./pruned_transducer_stateless7_bbpe/exp \
+      --max-sym-per-frame 1 \
+      --ngram-lm-scale 0.25 \
+      --ilme-scale 0.2 \
+      --bpe-model data/lang_bbpe_500/bbpe.model \
+      --max-duration 2000 \
+      --decoding-method $m
+done
+```
+
+The pretrained model is available at: https://huggingface.co/pkufool/icefall_asr_aishell_pruned_transducer_stateless7_bbpe
+
+
 #### Pruned transducer stateless 3

 See <https://github.com/k2-fsa/icefall/pull/436>
@ -15,6 +118,8 @@ It uses pruned RNN-T.
 |------------------------|------|------|---------------------------------------|
 | greedy search          | 5.39 | 5.09 | --epoch 29 --avg 5 --max-duration 600 |
 | modified beam search   | 5.05 | 4.79 | --epoch 29 --avg 5 --max-duration 600 |
+| modified beam search + RNNLM shallow fusion   | 4.73 | 4.53 | --epoch 29 --avg 5 --max-duration 600 |
+| modified beam search + LODR   | 4.57 | 4.37 | --epoch 29 --avg 5 --max-duration 600 |
 | fast beam search       | 5.13 | 4.91 | --epoch 29 --avg 5 --max-duration 600 |

 Training command is:
@ -73,6 +178,78 @@ for epoch in 29; do
 done
 ```

+We provide the option of shallow fusion with a RNN language model. The pre-trained language model is
+available at <https://huggingface.co/marcoyang/icefall-aishell-rnn-lm>. To decode with the language model,
+please use the following command:
+
+```bash
+# download pre-trained model
+git lfs install
+git clone https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20
+
+aishell_exp=icefall-aishell-pruned-transducer-stateless3-2022-06-20/
+
+pushd ${aishell_exp}/exp
+ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt epoch-99.pt
+popd
+
+# download RNN LM
+git lfs install
+git clone https://huggingface.co/marcoyang/icefall-aishell-rnn-lm
+rnnlm_dir=icefall-aishell-rnn-lm
+
+# RNNLM shallow fusion
+for lm_scale in $(seq 0.26 0.02 0.34); do
+  python ./pruned_transducer_stateless3/decode.py \
+      --epoch 99 \
+      --avg 1 \
+      --lang-dir ${aishell_exp}/data/lang_char \
+      --exp-dir ${aishell_exp}/exp \
+      --use-averaged-model False \
+      --decoding-method modified_beam_search_lm_shallow_fusion \
+      --use-shallow-fusion 1 \
+      --lm-type rnn \
+      --lm-exp-dir ${rnnlm_dir}/exp \
+      --lm-epoch 99 \
+      --lm-scale $lm_scale \
+      --lm-avg 1 \
+      --rnn-lm-embedding-dim 2048 \
+      --rnn-lm-hidden-dim 2048 \
+      --rnn-lm-num-layers 2 \
+      --lm-vocab-size 4336
+done
+
+# RNNLM Low-order density ratio (LODR) with a 2-gram
+
+cp ${rnnlm_dir}/2gram.fst.txt ${aishell_exp}/data/lang_char/2gram.fst.txt
+
+for lm_scale in 0.48; do
+  for LODR_scale in -0.28; do
+    python ./pruned_transducer_stateless3/decode.py \
+        --epoch 99 \
+        --avg 1 \
+        --lang-dir ${aishell_exp}/data/lang_char \
+        --exp-dir ${aishell_exp}/exp \
+        --use-averaged-model False \
+        --decoding-method modified_beam_search_LODR \
+        --use-shallow-fusion 1 \
+        --lm-type rnn \
+        --lm-exp-dir ${rnnlm_dir}/exp \
+        --lm-epoch 99 \
+        --lm-scale $lm_scale \
+        --lm-avg 1 \
+        --rnn-lm-embedding-dim 2048 \
+        --rnn-lm-hidden-dim 2048 \
+        --rnn-lm-num-layers 2 \
+        --lm-vocab-size 4336 \
+        --tokens-ngram 2 \
+        --backoff-id 4336 \
+        --ngram-lm-scale $LODR_scale
+  done
+done
+
+```
+
 Pretrained models, training logs, decoding logs, and decoding results
 are available at
 <https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20>
--- a/egs/aishell/ASR/local/compile_lg.py
+++ b/egs/aishell/ASR/local/compile_lg.py
@ -0,0 +1 @@
+../../../librispeech/ASR/local/compile_lg.py
--- a/egs/aishell/ASR/local/prepare_char.py
+++ b/egs/aishell/ASR/local/prepare_char.py
@ -33,6 +33,7 @@ and generates the following files in the directory `lang_dir`:
    - tokens.txt
 """

+import argparse
 import re
 from pathlib import Path
 from typing import Dict, List
@ -189,8 +190,22 @@ def generate_tokens(text_file: str) -> Dict[str, int]:
    return tokens


+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-dir",
+        type=str,
+        help="""Input and output directory.
+        It should contain the bpe.model and words.txt
+        """,
+    )
+
+    return parser.parse_args()
+
+
 def main():
-    lang_dir = Path("data/lang_char")
+    args = get_args()
+    lang_dir = Path(args.lang_dir)
    text_file = lang_dir / "text"

    word_sym_table = k2.SymbolTable.from_file(lang_dir / "words.txt")
--- a/egs/aishell/ASR/local/prepare_char_lm_training_data.py
+++ b/egs/aishell/ASR/local/prepare_char_lm_training_data.py
@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+
+# Copyright (c)  2021  Xiaomi Corporation (authors: Daniel Povey
+#                                                   Fangjun Kuang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This script takes a `tokens.txt` and a text file such as
+./download/lm/aishell-transcript.txt
+and outputs the LM training data to a supplied directory such
+as data/lm_training_char.  The format is as follows:
+It creates a PyTorch archive (.pt file), say data/lm_training.pt, which is a
+representation of a dict with the same format with librispeech receipe
+"""
+
+import argparse
+import logging
+from pathlib import Path
+
+import k2
+import torch
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-char",
+        type=str,
+        help="""Lang dir of asr model, e.g. data/lang_char""",
+    )
+    parser.add_argument(
+        "--lm-data",
+        type=str,
+        help="""Input LM training data as text, e.g.
+        download/lm/aishell-train-word.txt""",
+    )
+    parser.add_argument(
+        "--lm-archive",
+        type=str,
+        help="""Path to output archive, e.g. data/lm_training_char/lm_data.pt;
+        look at the source of this script to see the format.""",
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+
+    if Path(args.lm_archive).exists():
+        logging.warning(f"{args.lm_archive} exists - skipping")
+        return
+
+    # make token_dict from tokens.txt in order to map characters to tokens.
+    token_dict = {}
+    token_file = args.lang_char + "/tokens.txt"
+
+    with open(token_file, "r") as f:
+        for line in f.readlines():
+            line_list = line.split()
+            token_dict[line_list[0]] = int(line_list[1])
+
+    # word2index is a dictionary from words to integer ids.  No need to reserve
+    # space for epsilon, etc.; the words are just used as a convenient way to
+    # compress the sequences of tokens.
+    word2index = dict()
+
+    word2token = []  # Will be a list-of-list-of-int, representing tokens.
+    sentences = []  # Will be a list-of-list-of-int, representing word-ids.
+
+    if "aishell-lm" in args.lm_data:
+        num_lines_in_total = 120098.0
+        step = 50000
+    elif "valid" in args.lm_data:
+        num_lines_in_total = 14326.0
+        step = 3000
+    elif "test" in args.lm_data:
+        num_lines_in_total = 7176.0
+        step = 3000
+    else:
+        num_lines_in_total = None
+        step = None
+
+    processed = 0
+
+    with open(args.lm_data) as f:
+        while True:
+            line = f.readline()
+            if line == "":
+                break
+
+            if step and processed % step == 0:
+                logging.info(
+                    f"Processed number of lines: {processed} "
+                    f"({processed / num_lines_in_total * 100: .3f}%)"
+                )
+            processed += 1
+
+            line_words = line.split()
+            for w in line_words:
+                if w not in word2index:
+                    w_token = []
+                    for t in w:
+                        if t in token_dict:
+                            w_token.append(token_dict[t])
+                        else:
+                            w_token.append(token_dict["<unk>"])
+                    word2index[w] = len(word2token)
+                    word2token.append(w_token)
+            sentences.append([word2index[w] for w in line_words])
+
+    logging.info("Constructing ragged tensors")
+    words = k2.ragged.RaggedTensor(word2token)
+    sentences = k2.ragged.RaggedTensor(sentences)
+
+    output = dict(words=words, sentences=sentences)
+
+    num_sentences = sentences.dim0
+    logging.info(f"Computing sentence lengths, num_sentences: {num_sentences}")
+    sentence_lengths = [0] * num_sentences
+    for i in range(num_sentences):
+        if step and i % step == 0:
+            logging.info(
+                f"Processed number of lines: {i} ({i / num_sentences * 100: .3f}%)"
+            )
+
+        word_ids = sentences[i]
+
+        # NOTE: If word_ids is a tensor with only 1 entry,
+        # token_ids is a torch.Tensor
+        token_ids = words[word_ids]
+        if isinstance(token_ids, k2.RaggedTensor):
+            token_ids = token_ids.values
+
+        # token_ids is a 1-D tensor containing the BPE tokens
+        # of the current sentence
+
+        sentence_lengths[i] = token_ids.numel()
+
+    output["sentence_lengths"] = torch.tensor(sentence_lengths, dtype=torch.int32)
+
+    torch.save(output, args.lm_archive)
+    logging.info(f"Saved to {args.lm_archive}")
+
+
+if __name__ == "__main__":
+    formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
+
+    logging.basicConfig(format=formatter, level=logging.INFO)
+
+    main()
--- a/egs/aishell/ASR/local/prepare_lang_bbpe.py
+++ b/egs/aishell/ASR/local/prepare_lang_bbpe.py
@ -0,0 +1,267 @@
+#!/usr/bin/env python3
+# Copyright    2021  Xiaomi Corp.        (authors: Fangjun Kuang
+#                                                  Wei Kang)
+#
+# See ../../../../LICENSE for clarification regarding multiple authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""
+
+This script takes as input `lang_dir`, which should contain::
+
+    - lang_dir/bbpe.model,
+    - lang_dir/words.txt
+
+and generates the following files in the directory `lang_dir`:
+
+    - lexicon.txt
+    - lexicon_disambig.txt
+    - L.pt
+    - L_disambig.pt
+    - tokens.txt
+"""
+
+import argparse
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import k2
+import sentencepiece as spm
+import torch
+from prepare_lang import (
+    Lexicon,
+    add_disambig_symbols,
+    add_self_loops,
+    write_lexicon,
+    write_mapping,
+)
+
+from icefall.byte_utils import byte_encode
+from icefall.utils import str2bool, tokenize_by_CJK_char
+
+
+def lexicon_to_fst_no_sil(
+    lexicon: Lexicon,
+    token2id: Dict[str, int],
+    word2id: Dict[str, int],
+    need_self_loops: bool = False,
+) -> k2.Fsa:
+    """Convert a lexicon to an FST (in k2 format).
+
+    Args:
+      lexicon:
+        The input lexicon. See also :func:`read_lexicon`
+      token2id:
+        A dict mapping tokens to IDs.
+      word2id:
+        A dict mapping words to IDs.
+      need_self_loops:
+        If True, add self-loop to states with non-epsilon output symbols
+        on at least one arc out of the state. The input label for this
+        self loop is `token2id["#0"]` and the output label is `word2id["#0"]`.
+    Returns:
+      Return an instance of `k2.Fsa` representing the given lexicon.
+    """
+    loop_state = 0  # words enter and leave from here
+    next_state = 1  # the next un-allocated state, will be incremented as we go
+
+    arcs = []
+
+    # The blank symbol <blk> is defined in local/train_bpe_model.py
+    assert token2id["<blk>"] == 0
+    assert word2id["<eps>"] == 0
+
+    eps = 0
+
+    for word, pieces in lexicon:
+        assert len(pieces) > 0, f"{word} has no pronunciations"
+        cur_state = loop_state
+
+        word = word2id[word]
+        pieces = [token2id[i] for i in pieces]
+
+        for i in range(len(pieces) - 1):
+            w = word if i == 0 else eps
+            arcs.append([cur_state, next_state, pieces[i], w, 0])
+
+            cur_state = next_state
+            next_state += 1
+
+        # now for the last piece of this word
+        i = len(pieces) - 1
+        w = word if i == 0 else eps
+        arcs.append([cur_state, loop_state, pieces[i], w, 0])
+
+    if need_self_loops:
+        disambig_token = token2id["#0"]
+        disambig_word = word2id["#0"]
+        arcs = add_self_loops(
+            arcs,
+            disambig_token=disambig_token,
+            disambig_word=disambig_word,
+        )
+
+    final_state = next_state
+    arcs.append([loop_state, final_state, -1, -1, 0])
+    arcs.append([final_state])
+
+    arcs = sorted(arcs, key=lambda arc: arc[0])
+    arcs = [[str(i) for i in arc] for arc in arcs]
+    arcs = [" ".join(arc) for arc in arcs]
+    arcs = "\n".join(arcs)
+
+    fsa = k2.Fsa.from_str(arcs, acceptor=False)
+    return fsa
+
+
+def generate_lexicon(
+    model_file: str, words: List[str], oov: str
+) -> Tuple[Lexicon, Dict[str, int]]:
+    """Generate a lexicon from a BPE model.
+
+    Args:
+      model_file:
+        Path to a sentencepiece model.
+      words:
+        A list of strings representing words.
+      oov:
+        The out of vocabulary word in lexicon.
+    Returns:
+      Return a tuple with two elements:
+        - A dict whose keys are words and values are the corresponding
+          word pieces.
+        - A dict representing the token symbol, mapping from tokens to IDs.
+    """
+    sp = spm.SentencePieceProcessor()
+    sp.load(str(model_file))
+
+    # Convert word to word piece IDs instead of word piece strings
+    # to avoid OOV tokens.
+    encode_words = [byte_encode(tokenize_by_CJK_char(w)) for w in words]
+    words_pieces_ids: List[List[int]] = sp.encode(encode_words, out_type=int)
+
+    # Now convert word piece IDs back to word piece strings.
+    words_pieces: List[List[str]] = [sp.id_to_piece(ids) for ids in words_pieces_ids]
+
+    lexicon = []
+    for word, pieces in zip(words, words_pieces):
+        lexicon.append((word, pieces))
+
+    lexicon.append((oov, ["▁", sp.id_to_piece(sp.unk_id())]))
+
+    token2id: Dict[str, int] = {sp.id_to_piece(i): i for i in range(sp.vocab_size())}
+
+    return lexicon, token2id
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--lang-dir",
+        type=str,
+        help="""Input and output directory.
+        It should contain the bpe.model and words.txt
+        """,
+    )
+
+    parser.add_argument(
+        "--oov",
+        type=str,
+        default="<UNK>",
+        help="The out of vocabulary word in lexicon.",
+    )
+
+    parser.add_argument(
+        "--debug",
+        type=str2bool,
+        default=False,
+        help="""True for debugging, which will generate
+        a visualization of the lexicon FST.
+
+        Caution: If your lexicon contains hundreds of thousands
+        of lines, please set it to False!
+
+        See "test/test_bpe_lexicon.py" for usage.
+        """,
+    )
+
+    return parser.parse_args()
+
+
+def main():
+    args = get_args()
+    lang_dir = Path(args.lang_dir)
+    model_file = lang_dir / "bbpe.model"
+
+    word_sym_table = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+    words = word_sym_table.symbols
+
+    excluded = ["<eps>", "!SIL", "<SPOKEN_NOISE>", args.oov, "#0", "<s>", "</s>"]
+
+    for w in excluded:
+        if w in words:
+            words.remove(w)
+
+    lexicon, token_sym_table = generate_lexicon(model_file, words, args.oov)
+
+    lexicon_disambig, max_disambig = add_disambig_symbols(lexicon)
+
+    next_token_id = max(token_sym_table.values()) + 1
+    for i in range(max_disambig + 1):
+        disambig = f"#{i}"
+        assert disambig not in token_sym_table
+        token_sym_table[disambig] = next_token_id
+        next_token_id += 1
+
+    word_sym_table.add("#0")
+    word_sym_table.add("<s>")
+    word_sym_table.add("</s>")
+
+    write_mapping(lang_dir / "tokens.txt", token_sym_table)
+
+    write_lexicon(lang_dir / "lexicon.txt", lexicon)
+    write_lexicon(lang_dir / "lexicon_disambig.txt", lexicon_disambig)
+
+    L = lexicon_to_fst_no_sil(
+        lexicon,
+        token2id=token_sym_table,
+        word2id=word_sym_table,
+    )
+
+    L_disambig = lexicon_to_fst_no_sil(
+        lexicon_disambig,
+        token2id=token_sym_table,
+        word2id=word_sym_table,
+        need_self_loops=True,
+    )
+    torch.save(L.as_dict(), lang_dir / "L.pt")
+    torch.save(L_disambig.as_dict(), lang_dir / "L_disambig.pt")
+
+    if args.debug:
+        labels_sym = k2.SymbolTable.from_file(lang_dir / "tokens.txt")
+        aux_labels_sym = k2.SymbolTable.from_file(lang_dir / "words.txt")
+
+        L.labels_sym = labels_sym
+        L.aux_labels_sym = aux_labels_sym
+        L.draw(f"{lang_dir / 'L.svg'}", title="L.pt")
+
+        L_disambig.labels_sym = labels_sym
+        L_disambig.aux_labels_sym = aux_labels_sym
+        L_disambig.draw(f"{lang_dir / 'L_disambig.svg'}", title="L_disambig.pt")
+
+
+if __name__ == "__main__":
+    main()
--- a/egs/aishell/ASR/local/sort_lm_training_data.py
+++ b/egs/aishell/ASR/local/sort_lm_training_data.py
@ -0,0 +1 @@
+../../../librispeech/ASR/local/sort_lm_training_data.py
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`../../../librispeech/ASR/local/compile_lg.py`
				`@ -0,0 +1 @@`
				`../../../librispeech/ASR/local/sort_lm_training_data.py`