Merge branch 'master' into wenetspeech

2025-12-11 06:55:27 +00:00 · 2023-05-08 08:38:01 +08:00 · 2023-05-08 08:38:01 +08:00 · 47565959d9
commit 47565959d9
parent 903ef3b161 efbb577b88
1090 changed files with 123314 additions and 11711 deletions
--- a/.flake8
+++ b/.flake8
@ -1,7 +1,7 @@
 [flake8]
 show-source=true
 statistics=true
-max-line-length = 80
+max-line-length = 88
 per-file-ignores =
    # line too long
    icefall/diagnostics.py: E501,
@ -11,7 +11,8 @@ per-file-ignores =
    egs/*/ASR/*/scaling.py: E501,
    egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
    egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
-    egs/librispeech/ASR/conformer_ctc2/*py: E501,
+    egs/librispeech/ASR/conformer_ctc*/*py: E501,
    egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
    egs/librispeech/ASR/RESULTS.md: E999,
    # invalid escape sequence (cause by tex formular), W605
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -0,0 +1,3 @@
 # Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
 107df3b115a58f1b68a6458c3f94a130004be34c
 d31db010371a4128856480382876acdc0d1739ed
--- a/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+++ b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
@ -15,5 +15,5 @@ mkdir -p data
 cd data
 [ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
 cd ..
-./local/compute_fbank_librispeech.py
+./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
 ls -lh data/fbank/
--- a/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh
+++ b/.github/scripts/run-aishell-pruned-transducer-stateless3-2022-06-20.sh
@ -25,7 +25,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
+++ b/.github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
@ -0,0 +1,122 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27
 log "Downloading pre-trained model from $repo_url"
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/HLG.pt"
 git lfs pull --include "data/lang_bpe_500/L.pt"
 git lfs pull --include "data/lang_bpe_500/LG.pt"
 git lfs pull --include "data/lang_bpe_500/Linv.pt"
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "data/lm/G_4_gram.pt"
 git lfs pull --include "exp/jit_trace.pt"
 git lfs pull --include "exp/pretrained.pt"
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Decode with models exported by torch.jit.trace()"
 for m in ctc-decoding 1best; do
  ./conformer_ctc3/jit_pretrained.py \
    --model-filename $repo/exp/jit_trace.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --G $repo/data/lm/G_4_gram.pt \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 log "Export to torchscript model"
 ./conformer_ctc3/export.py \
  --exp-dir $repo/exp \
  --lang-dir $repo/data/lang_bpe_500 \
  --jit-trace 1 \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.trace()"
 for m in ctc-decoding 1best; do
  ./conformer_ctc3/jit_pretrained.py \
    --model-filename $repo/exp/jit_trace.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --G $repo/data/lm/G_4_gram.pt \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for m in ctc-decoding 1best; do
  ./conformer_ctc3/pretrained.py \
    --checkpoint $repo/exp/pretrained.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --G $repo/data/lm/G_4_gram.pt \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p conformer_ctc3/exp
  ln -s $PWD/$repo/exp/pretrained.pt conformer_ctc3/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh conformer_ctc3/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in ctc-decoding 1best; do
    log "Decoding with $method"
    ./conformer_ctc3/decode.py \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --exp-dir conformer_ctc3/exp/ \
      --max-duration $max_duration \
      --decoding-method $method \
      --lm-dir data/lm
  done
  rm conformer_ctc3/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
+++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
@ -0,0 +1,191 @@
 #!/usr/bin/env bash
 #
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 abs_repo=$(realpath $repo)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Test exporting with torch.jit.trace()"
 ./lstm_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --jit-trace 1
 log "Decode with models exported by torch.jit.trace()"
 ./lstm_transducer_stateless2/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
  --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
  --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./lstm_transducer_stateless2/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./lstm_transducer_stateless2/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
  lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
  log "Download pre-trained RNN-LM model from ${lm_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
  lm_repo=$(basename $lm_repo_url)
  pushd $lm_repo
  git lfs pull --include "exp/pretrained.pt"
  mv exp/pretrained.pt exp/epoch-88.pt
  popd
  mkdir -p lstm_transducer_stateless2/exp
  ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other with RNN LM"
  ./lstm_transducer_stateless2/decode.py \
    --use-averaged-model 0 \
    --epoch 999 \
    --avg 1 \
    --exp-dir lstm_transducer_stateless2/exp \
    --max-duration 600 \
    --decoding-method modified_beam_search_lm_shallow_fusion \
    --beam 4 \
    --use-shallow-fusion 1 \
    --lm-type rnn \
    --lm-exp-dir $lm_repo/exp \
    --lm-epoch 88 \
    --lm-avg 1 \
    --lm-scale 0.3 \
    --rnn-lm-num-layers 3 \
    --rnn-lm-tie-weights 1
 fi
 if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
  bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
  log "Download bi-gram LM from ${bigram_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
  bigramlm_repo=$(basename $bigram_repo_url)
  pushd $bigramlm_repo
  git lfs pull --include "2gram.fst.txt"
  cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
  popd
  lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
  log "Download pre-trained RNN-LM model from ${lm_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
  lm_repo=$(basename $lm_repo_url)
  pushd $lm_repo
  git lfs pull --include "exp/pretrained.pt"
  mv exp/pretrained.pt exp/epoch-88.pt
  popd
  mkdir -p lstm_transducer_stateless2/exp
  ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other"
  ./lstm_transducer_stateless2/decode.py \
    --use-averaged-model 0 \
    --epoch 999 \
    --avg 1 \
    --exp-dir lstm_transducer_stateless2/exp \
    --max-duration 600 \
    --decoding-method modified_beam_search_LODR \
    --beam 4 \
    --use-shallow-fusion 1 \
    --lm-type rnn \
    --lm-exp-dir $lm_repo/exp \
    --lm-scale 0.4 \
    --lm-epoch 88 \
    --rnn-lm-avg 1 \
    --rnn-lm-num-layers 3 \
    --rnn-lm-tie-weights 1 \
    --tokens-ngram 2 \
    --ngram-lm-scale -0.16
 fi
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
  mkdir -p lstm_transducer_stateless2/exp
  ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./lstm_transducer_stateless2/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir lstm_transducer_stateless2/exp
  done
  rm lstm_transducer_stateless2/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
@ -1,233 +0,0 @@
 #!/usr/bin/env bash
 #
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log  "Install ncnn and pnnx"
 # We are using a modified ncnn here. Will try to merge it to the official repo
 # of ncnn
 git clone https://github.com/csukuangfj/ncnn
 pushd ncnn
 git submodule init
 git submodule update python/pybind11
 python3 setup.py bdist_wheel
 ls -lh dist/
 pip install dist/*.whl
 cd tools/pnnx
 mkdir build
 cd build
 cmake ..
 make -j4 pnnx
 ./src/pnnx || echo "pass"
 popd
 log "Test exporting to pnnx format"
 ./lstm_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --pnnx 1
 ./ncnn/tools/pnnx/build/src/pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 ./ncnn/tools/pnnx/build/src/pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 ./ncnn/tools/pnnx/build/src/pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 ./lstm_transducer_stateless2/ncnn-decode.py \
 --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
 --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
 --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
 --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
 --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
 --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
 --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
 $repo/test_wavs/1089-134686-0001.wav
 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
 --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
 --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
 --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
 --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
 --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
 --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
 --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
 $repo/test_wavs/1089-134686-0001.wav
 log "Test exporting with torch.jit.trace()"
 ./lstm_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --jit-trace 1
 log "Decode with models exported by torch.jit.trace()"
 ./lstm_transducer_stateless2/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
  --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
  --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "Test exporting to ONNX"
 ./lstm_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --onnx 1
 log "Decode with ONNX models "
 ./lstm_transducer_stateless2/streaming-onnx-decode.py \
  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo//exp/encoder.onnx \
  --decoder-model-filename $repo/exp/decoder.onnx \
  --joiner-model-filename $repo/exp/joiner.onnx \
  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
 $repo/test_wavs/1089-134686-0001.wav
 ./lstm_transducer_stateless2/streaming-onnx-decode.py \
  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo//exp/encoder.onnx \
  --decoder-model-filename $repo/exp/decoder.onnx \
  --joiner-model-filename $repo/exp/joiner.onnx \
  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
 $repo/test_wavs/1221-135766-0001.wav
 ./lstm_transducer_stateless2/streaming-onnx-decode.py \
  --bpe-model-filename $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo//exp/encoder.onnx \
  --decoder-model-filename $repo/exp/decoder.onnx \
  --joiner-model-filename $repo/exp/joiner.onnx \
  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
 $repo/test_wavs/1221-135766-0002.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./lstm_transducer_stateless2/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./lstm_transducer_stateless2/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
  lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
  log "Download pre-trained RNN-LM model from ${lm_repo_url}"
  git clone $lm_repo_url
  lm_repo=$(basename $lm_repo_url)
  pushd $lm_repo
  git lfs pull --include "exp/pretrained.pt"
  cd exp
  ln -s pretrained.pt epoch-88.pt
  popd
  ./lstm_transducer_stateless2/decode.py \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $repo/exp \
    --lang-dir $repo/data/lang_bpe_500 \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --max-duration 600 \
    --decoding-method modified_beam_search_rnnlm_shallow_fusion \
    --beam 4 \
    --rnn-lm-scale 0.3 \
    --rnn-lm-exp-dir $lm_repo/exp \
    --rnn-lm-epoch 88 \
    --rnn-lm-avg 1 \
    --rnn-lm-num-layers 3 \
    --rnn-lm-tie-weights 1
 fi
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
  mkdir -p lstm_transducer_stateless2/exp
  ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./lstm_transducer_stateless2/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir lstm_transducer_stateless2/exp
  done
  rm lstm_transducer_stateless2/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless-2022-03-12.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless2-2022-04-29.sh
@ -23,7 +23,6 @@ popd
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-04-29.sh
@ -22,7 +22,6 @@ popd
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless3-2022-05-13.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
@ -27,14 +26,6 @@ ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
 ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
 popd
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless3/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --onnx 1
 log "Export to torchscript model"
 ./pruned_transducer_stateless3/export.py \
@ -51,30 +42,8 @@ log "Export to torchscript model"
  --avg 1 \
  --jit-trace 1
 ls -lh $repo/exp/*.onnx
 ls -lh $repo/exp/*.pt
 log "Decode with ONNX models"
 ./pruned_transducer_stateless3/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder.onnx \
  --onnx-decoder-filename $repo/exp/decoder.onnx \
  --onnx-joiner-filename $repo/exp/joiner.onnx \
  --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj.onnx \
  --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj.onnx
 ./pruned_transducer_stateless3/onnx_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo/exp/encoder.onnx \
  --decoder-model-filename $repo/exp/decoder.onnx \
  --joiner-model-filename $repo/exp/joiner.onnx \
  --joiner-encoder-proj-model-filename $repo/exp/joiner_encoder_proj.onnx \
  --joiner-decoder-proj-model-filename $repo/exp/joiner_decoder_proj.onnx \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "Decode with models exported by torch.jit.trace()"
 ./pruned_transducer_stateless3/jit_pretrained.py \
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless5-2022-05-13.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-2022-11-11.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
@ -33,6 +32,7 @@ popd
 log "Export to torchscript model"
 ./pruned_transducer_stateless7/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
@ -0,0 +1,150 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
 log "Downloading pre-trained model from $repo_url"
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/HLG.pt"
 git lfs pull --include "data/lang_bpe_500/L.pt"
 git lfs pull --include "data/lang_bpe_500/LG.pt"
 git lfs pull --include "data/lang_bpe_500/Linv.pt"
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "data/lm/G_4_gram.pt"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Export to torchscript model"
 ./pruned_transducer_stateless7_ctc/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./pruned_transducer_stateless7_ctc/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for m in ctc-decoding 1best; do
  ./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
    --model-filename $repo/exp/cpu_jit.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --G $repo/data/lm/G_4_gram.pt \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./pruned_transducer_stateless7_ctc/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./pruned_transducer_stateless7_ctc/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for m in ctc-decoding 1best; do
  ./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
    --checkpoint $repo/exp/pretrained.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --G $repo/data/lm/G_4_gram.pt \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p pruned_transducer_stateless7_ctc/exp
  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh pruned_transducer_stateless7_ctc/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./pruned_transducer_stateless7_ctc/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir pruned_transducer_stateless7_ctc/exp
  done
  for m in ctc-decoding 1best; do
    ./pruned_transducer_stateless7_ctc/ctc_decode.py \
        --epoch 999 \
        --avg 1 \
        --exp-dir ./pruned_transducer_stateless7_ctc/exp \
        --max-duration $max_duration \
        --use-averaged-model 0 \
        --decoding-method $m \
        --hlg-scale 0.6 \
        --lm-dir data/lm
  done
  rm pruned_transducer_stateless7_ctc/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
@ -0,0 +1,147 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
 log "Downloading pre-trained model from $repo_url"
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/HLG.pt"
 git lfs pull --include "data/lang_bpe_500/L.pt"
 git lfs pull --include "data/lang_bpe_500/LG.pt"
 git lfs pull --include "data/lang_bpe_500/Linv.pt"
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Export to torchscript model"
 ./pruned_transducer_stateless7_ctc_bs/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for m in ctc-decoding 1best; do
  ./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
    --model-filename $repo/exp/cpu_jit.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./pruned_transducer_stateless7_ctc_bs/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./pruned_transducer_stateless7_ctc_bs/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for m in ctc-decoding 1best; do
  ./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
    --checkpoint $repo/exp/pretrained.pt \
    --words-file $repo/data/lang_bpe_500/words.txt  \
    --HLG $repo/data/lang_bpe_500/HLG.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --method $m \
    --sample-rate 16000 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p pruned_transducer_stateless7_ctc_bs/exp
  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_ctc_bs/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh pruned_transducer_stateless7_ctc_bs/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./pruned_transducer_stateless7_ctc_bs/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir pruned_transducer_stateless7_ctc_bs/exp
  done
  for m in ctc-decoding 1best; do
    ./pruned_transducer_stateless7_ctc_bs/ctc_decode.py \
        --epoch 999 \
        --avg 1 \
        --exp-dir ./pruned_transducer_stateless7_ctc_bs/exp \
        --max-duration $max_duration \
        --use-averaged-model 0 \
        --decoding-method $m \
        --hlg-scale 0.6
  done
  rm pruned_transducer_stateless7_ctc_bs/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
@ -0,0 +1,148 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 git lfs pull --include "exp/encoder_jit_trace.pt"
 git lfs pull --include "exp/decoder_jit_trace.pt"
 git lfs pull --include "exp/joiner_jit_trace.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Export to torchscript model"
 ./pruned_transducer_stateless7_streaming/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --decode-chunk-len 32 \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./pruned_transducer_stateless7_streaming/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  --decode-chunk-len 32 \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "Export to torchscript model by torch.jit.trace()"
 ./pruned_transducer_stateless7_streaming/jit_trace_export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --decode-chunk-len 32 \
  --epoch 99 \
  --avg 1
 log "Decode with models exported by torch.jit.trace()"
 ./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
  --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
  --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
  --decode-chunk-len 32 \
  $repo/test_wavs/1089-134686-0001.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./pruned_transducer_stateless7_streaming/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --decode-chunk-len 32 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./pruned_transducer_stateless7_streaming/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    --decode-chunk-len 32 \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p pruned_transducer_stateless7_streaming/exp
  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless7_streaming/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh pruned_transducer_stateless7_streaming/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  num_decode_stream=200
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "decoding with $method"
    ./pruned_transducer_stateless7_streaming/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --decode-chunk-len 32 \
      --exp-dir pruned_transducer_stateless7_streaming/exp
  done
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./pruned_transducer_stateless7_streaming/streaming_decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --decode-chunk-len 32 \
      --num-decode-streams $num_decode_stream
      --exp-dir pruned_transducer_stateless7_streaming/exp
  done
  rm pruned_transducer_stateless7_streaming/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
+++ b/.github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
@ -0,0 +1,115 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Decode with models exported by torch.jit.script()"
 ./pruned_transducer_stateless8/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "Export to torchscript model"
 ./pruned_transducer_stateless8/export.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model false \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./pruned_transducer_stateless8/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./pruned_transducer_stateless8/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./pruned_transducer_stateless8/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p pruned_transducer_stateless8/exp
  ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless8/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh pruned_transducer_stateless8/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search modified_beam_search; do
    log "Decoding with $method"
    ./pruned_transducer_stateless8/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir pruned_transducer_stateless8/exp
  done
  rm pruned_transducer_stateless8/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh
+++ b/.github/scripts/run-librispeech-streaming-pruned-transducer-stateless2-2022-06-26.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
+++ b/.github/scripts/run-librispeech-transducer-stateless2-2022-04-19.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
+++ b/.github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
@ -0,0 +1,102 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08
 log "Downloading pre-trained model from $repo_url"
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/3gram.pt"
 git lfs pull --include "data/lang_bpe_500/4gram.pt"
 git lfs pull --include "data/lang_bpe_500/L.pt"
 git lfs pull --include "data/lang_bpe_500/LG.pt"
 git lfs pull --include "data/lang_bpe_500/Linv.pt"
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/cpu_jit.pt"
 git lfs pull --include "exp/pretrained.pt"
 ln -s pretrained.pt epoch-99.pt
 ls -lh *.pt
 popd
 log "Export to torchscript model"
 ./zipformer_mmi/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./zipformer_mmi/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --nn-model-filename $repo/exp/cpu_jit.pt \
  --lang-dir $repo/data/lang_bpe_500 \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
  log "$method"
  ./zipformer_mmi/pretrained.py \
    --method $method \
    --checkpoint $repo/exp/pretrained.pt \
    --lang-dir $repo/data/lang_bpe_500 \
    --bpe-model $repo/data/lang_bpe_500/bpe.model \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p zipformer_mmi/exp
  ln -s $PWD/$repo/exp/pretrained.pt zipformer_mmi/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh zipformer_mmi/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
    log "Decoding with $method"
    ./zipformer_mmi/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --nbest-scale 1.2 \
      --hp-scale 1.0 \
      --max-duration $max_duration \
      --lang-dir $repo/data/lang_bpe_500 \
      --exp-dir zipformer_mmi/exp
  done
  rm zipformer_mmi/exp/*.pt
 fi
--- a/.github/scripts/run-pre-trained-conformer-ctc.sh
+++ b/.github/scripts/run-pre-trained-conformer-ctc.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.flac
 ls -lh $repo/test_wavs/*.flac
 log "CTC decoding"
--- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-100h.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-librispeech-960h.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-modified-2-aishell.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless-modified-aishell.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer-stateless.sh
+++ b/.github/scripts/run-pre-trained-transducer-stateless.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 for sym in 1 2 3; do
--- a/.github/scripts/run-pre-trained-transducer.sh
+++ b/.github/scripts/run-pre-trained-transducer.sh
@ -19,7 +19,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 log "Beam search decoding"
--- a/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
+++ b/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
@ -20,7 +20,6 @@ repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 soxi $repo/test_wavs/*.wav
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
--- a/.github/scripts/test-ncnn-export.sh
+++ b/.github/scripts/test-ncnn-export.sh
@ -0,0 +1,234 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 pushd egs/librispeech/ASR
 log  "Install ncnn and pnnx"
 # We are using a modified ncnn here. Will try to merge it to the official repo
 # of ncnn
 git clone https://github.com/csukuangfj/ncnn
 pushd ncnn
 git submodule init
 git submodule update python/pybind11
 python3 setup.py bdist_wheel
 ls -lh dist/
 pip install dist/*.whl
 cd tools/pnnx
 mkdir build
 cd build
 echo "which python3"
 which python3
 #/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
 cmake -D Python3_EXECUTABLE=$(which python3) ..
 make -j4 pnnx
 ./src/pnnx || echo "pass"
 popd
 export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
 cd exp
 ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./conv_emformer_transducer_stateless2/export-for-ncnn.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  \
  --num-encoder-layers 12 \
  --chunk-length 32 \
  --cnn-module-kernel 31 \
  --left-context-length 32 \
  --right-context-length 8 \
  --memory-size 32
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
 cd exp
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./lstm_transducer_stateless2/export-for-ncnn.py \
  --exp-dir $repo/exp \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 python3 ./lstm_transducer_stateless2/ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --exp-dir $repo/exp \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  \
  --decode-chunk-len 32 \
  --num-encoder-layers "2,4,3,2,4" \
  --feedforward-dims "1024,1024,2048,2048,1024" \
  --nhead "8,8,8,8,8" \
  --encoder-dims "384,384,384,384,384" \
  --attention-dims "192,192,192,192,192" \
  --encoder-unmasked-dims "256,256,256,256,256" \
  --zipformer-downsampling-factors "1,2,4,8,2" \
  --cnn-module-kernels "31,31,31,31,31" \
  --decoder-dim 512 \
  --joiner-dim 512
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_char_bpe/L.pt"
 git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
 git lfs pull --include "data/lang_char_bpe/Linv.pt"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
  --lang-dir $repo/data/lang_char_bpe \
  --exp-dir $repo/exp \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --decode-chunk-len 32 \
  --num-encoder-layers "2,4,3,2,4" \
  --feedforward-dims "1024,1024,1536,1536,1024" \
  --nhead "8,8,8,8,8" \
  --encoder-dims "384,384,384,384,384" \
  --attention-dims "192,192,192,192,192" \
  --encoder-unmasked-dims "256,256,256,256,256" \
  --zipformer-downsampling-factors "1,2,4,8,2" \
  --cnn-module-kernels "31,31,31,31,31" \
  --decoder-dim 512 \
  --joiner-dim 512
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_char_bpe/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/0.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
--- a/.github/scripts/test-onnx-export.sh
+++ b/.github/scripts/test-onnx-export.sh
@ -0,0 +1,351 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./pruned_transducer_stateless7_streaming/jit_trace_export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --decode-chunk-len 32 \
  --exp-dir $repo/exp/
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless7_streaming/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --decode-chunk-len 32 \
  --exp-dir $repo/exp/
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless7_streaming/onnx_check.py \
  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
 cd exp
 ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless3/export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 9999 \
  --avg 1 \
  --exp-dir $repo/exp/ \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless3/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 9999 \
  --avg 1 \
  --exp-dir $repo/exp/
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless3/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless3/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
 cd exp
 ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless5/export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --exp-dir $repo/exp \
  --num-encoder-layers 18 \
  --dim-feedforward 2048 \
  --nhead 8 \
  --encoder-dim 512 \
  --decoder-dim 512 \
  --joiner-dim 512 \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless5/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --exp-dir $repo/exp \
  --num-encoder-layers 18 \
  --dim-feedforward 2048 \
  --nhead 8 \
  --encoder-dim 512 \
  --decoder-dim 512 \
  --joiner-dim 512
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless5/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless5/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless7/export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --feedforward-dims "1024,1024,2048,2048,1024" \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless7/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --feedforward-dims "1024,1024,2048,2048,1024"
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless7/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless7/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
 cd exp
 ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
 popd
 log "Test exporting to ONNX format"
 ./conv_emformer_transducer_stateless2/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --num-encoder-layers 12 \
  --chunk-length 32 \
  --cnn-module-kernel 31 \
  --left-context-length 32 \
  --right-context-length 8 \
  --memory-size 32
 log "Run onnx_pretrained.py"
 ./conv_emformer_transducer_stateless2/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1221-135766-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
 cd exp
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./lstm_transducer_stateless2/export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp/ \
  --jit-trace 1
 log "Test exporting to ONNX format"
 ./lstm_transducer_stateless2/export-onnx.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./lstm_transducer_stateless2/onnx_check.py \
  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./lstm_transducer_stateless2/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1221-135766-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
--- a/.github/workflows/build-doc.yml
+++ b/.github/workflows/build-doc.yml
@ -26,6 +26,10 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: build_doc-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build-doc:
    if: github.event.label.name == 'doc' || github.event_name == 'push'
--- a/.github/workflows/run-aishell-2022-06-20.yml
+++ b/.github/workflows/run-aishell-2022-06-20.yml
@ -34,6 +34,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_aishell_2022_06_20-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_aishell_2022_06_20:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -61,7 +65,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -83,7 +87,7 @@ jobs:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-gigaspeech-2022-05-13.yml
+++ b/.github/workflows/run-gigaspeech-2022-05-13.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_gigaspeech_2022_05_13-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_gigaspeech_2022_05_13:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
--- a/.github/workflows/run-librispeech-2022-03-12.yml
+++ b/.github/workflows/run-librispeech-2022-03-12.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_03_12-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_03_12:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-04-29.yml
+++ b/.github/workflows/run-librispeech-2022-04-29.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_04_29-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_04_29:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-2022-05-13.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_05_13-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_05_13:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
+++ b/.github/workflows/run-librispeech-2022-11-11-stateless7.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_11_11_zipformer-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_11_11_zipformer:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
+++ b/.github/workflows/run-librispeech-2022-11-14-stateless8.yml
@ -0,0 +1,159 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-2022-11-14-stateless8
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_11_14_zipformer_stateless8-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_11_14_zipformer_stateless8:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-pruned-transducer-stateless8-2022-11-14.sh
      - name: Display decoding results for librispeech pruned_transducer_stateless8
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./pruned_transducer_stateless8/exp
          cd pruned_transducer_stateless8
          echo "results for pruned_transducer_stateless8"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===fast_beam_search==="
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===modified beam search==="
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech pruned_transducer_stateless8
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless8-2022-11-14
          path: egs/librispeech/ASR/pruned_transducer_stateless8/exp/
--- a/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml
+++ b/.github/workflows/run-librispeech-2022-12-01-stateless7-ctc.yml
@ -0,0 +1,163 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-2022-12-01-stateless7-ctc
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 jobs:
  run_librispeech_2022_11_11_zipformer:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-2022-12-01.sh
      - name: Display decoding results for librispeech pruned_transducer_stateless7_ctc
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./pruned_transducer_stateless7_ctc/exp
          cd pruned_transducer_stateless7_ctc
          echo "results for pruned_transducer_stateless7_ctc"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===fast_beam_search==="
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===modified beam search==="
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===ctc decoding==="
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===1best==="
          find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-2022-12-01
          path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc/exp/
--- a/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml
+++ b/.github/workflows/run-librispeech-2022-12-08-zipformer-mmi.yml
@ -0,0 +1,167 @@
 # Copyright      2022  Zengwei Yao
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-2022-12-08-zipformer-mmi
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_12_08_zipformer-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_12_08_zipformer:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-zipformer-mmi-2022-12-08.sh
      - name: Display decoding results for librispeech zipformer-mmi
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./zipformer-mmi/exp
          cd zipformer-mmi
          echo "results for zipformer-mmi"
          echo "===1best==="
          find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===nbest==="
          find exp/nbest -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/nbest -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===nbest-rescoring-LG==="
          find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/nbest-rescoring-LG -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===nbest-rescoring-3-gram==="
          find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/nbest-rescoring-3-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===nbest-rescoring-4-gram==="
          find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/nbest-rescoring-4-gram -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech zipformer-mmi
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-zipformer_mmi-2022-12-08
          path: egs/librispeech/ASR/zipformer_mmi/exp/
--- a/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml
+++ b/.github/workflows/run-librispeech-2022-12-15-stateless7-ctc-bs.yml
@ -0,0 +1,163 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-2022-12-15-stateless7-ctc-bs
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 jobs:
  run_librispeech_2022_12_15_zipformer_ctc_bs:
    if: github.event.label.name == 'run-decode' || github.event.label.name == 'blank-skip' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-pruned-transducer-stateless7-ctc-bs-2022-12-15.sh
      - name: Display decoding results for librispeech pruned_transducer_stateless7_ctc_bs
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./pruned_transducer_stateless7_ctc_bs/exp
          cd pruned_transducer_stateless7_ctc_bs
          echo "results for pruned_transducer_stateless7_ctc_bs"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===fast_beam_search==="
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===modified beam search==="
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===ctc decoding==="
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===1best==="
          find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech pruned_transducer_stateless7_ctc_bs
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-ctc-bs-2022-12-15
          path: egs/librispeech/ASR/pruned_transducer_stateless7_ctc_bs/exp/
--- a/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml
+++ b/.github/workflows/run-librispeech-2022-12-29-stateless7-streaming.yml
@ -0,0 +1,172 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-2022-12-29-stateless7-streaming
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_12_29_zipformer_streaming-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_12_29_zipformer_streaming:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event.label.name == 'streaming-zipformer' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-pruned-transducer-stateless7-streaming-2022-12-29.sh
      - name: Display decoding results for librispeech pruned_transducer_stateless7_streaming
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./pruned_transducer_stateless7_streaming/exp
          cd pruned_transducer_stateless7_streaming
          echo "results for pruned_transducer_stateless7_streaming"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===fast_beam_search==="
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===modified beam search==="
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===streaming greedy search==="
          find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/streaming/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===streaming fast_beam_search==="
          find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/streaming/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===streaming modified beam search==="
          find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/streaming/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech pruned_transducer_stateless7_streaming
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-pruned_transducer_stateless7-streaming-2022-12-29
          path: egs/librispeech/ASR/pruned_transducer_stateless7_streaming/exp/
--- a/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
+++ b/.github/workflows/run-librispeech-conformer-ctc3-2022-11-28.yml
@ -0,0 +1,155 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-librispeech-conformer-ctc3-2022-11-28
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_11_28_conformer_ctc3-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_11_28_conformer_ctc3:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-conformer-ctc3-2022-11-28.sh
      - name: Display decoding results for librispeech conformer_ctc3
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/librispeech/ASR/
          tree ./conformer_ctc3/exp
          cd conformer_ctc3
          echo "results for conformer_ctc3"
          echo "===ctc-decoding==="
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/ctc-decoding -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===1best==="
          find exp/1best -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/1best -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for librispeech conformer_ctc3
        uses: actions/upload-artifact@v2
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-conformer_ctc3-2022-11-28
          path: egs/librispeech/ASR/conformer_ctc3/exp/
--- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
@ -16,9 +16,13 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_lstm_transducer_stateless2_2022_09_03:
-    if: github.event.label.name == 'ready' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'ncnn' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -43,7 +47,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -102,12 +106,12 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
-          .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+          .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
      - name: Display decoding results for lstm_transducer_stateless2
        if: github.event_name == 'schedule'
@ -135,13 +139,25 @@ jobs:
          cd egs/librispeech/ASR
          tree lstm_transducer_stateless2/exp
          cd lstm_transducer_stateless2/exp
-          echo "===modified_beam_search_rnnlm_shallow_fusion==="
+          echo "===modified_beam_search_lm_shallow_fusion==="
-          find modified_beam_search_rnnlm_shallow_fusion  -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
+          echo "===Using RNNLM==="
-          find modified_beam_search_rnnlm_shallow_fusion  -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
+          find modified_beam_search_lm_shallow_fusion  -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find modified_beam_search_lm_shallow_fusion  -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Display decoding results for lstm_transducer_stateless2
        if: github.event.label.name == 'LODR'
        shell: bash
        run: |
          cd egs/librispeech/ASR
          tree lstm_transducer_stateless2/exp
          cd lstm_transducer_stateless2/exp
          echo "===modified_beam_search_rnnlm_LODR==="
          find modified_beam_search_LODR  -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find modified_beam_search_LODR  -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for lstm_transducer_stateless2
        uses: actions/upload-artifact@v2
-        if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion'
+        if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-18.04-cpu-lstm_transducer_stateless2-2022-09-03
          path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
--- a/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
+++ b/.github/workflows/run-librispeech-pruned-transducer-stateless3-2022-05-13.yml
@ -33,9 +33,13 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_pruned_transducer_stateless3_2022_05_13-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_pruned_transducer_stateless3_2022_05_13:
-    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
+    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
+++ b/.github/workflows/run-librispeech-streaming-transducer-stateless2-2022-06-26.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_streaming_2022_06_26-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_streaming_2022_06_26:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
+++ b/.github/workflows/run-librispeech-transducer-stateless2-2022-04-19.yml
@ -33,6 +33,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_librispeech_2022_04_19-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_2022_04_19:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -60,7 +64,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -119,7 +123,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-conformer-ctc.yml
+++ b/.github/workflows/run-pretrained-conformer-ctc.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: run_pre_trained_conformer_ctc-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_conformer_ctc:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
@ -50,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -69,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-100h.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer_stateless_multi_datasets_librispeech_100h:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -59,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -118,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-librispeech-multi-datasets.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer_stateless_multi_datasets_librispeech_960h:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -59,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -118,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-2-aishell.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: run_pre_trained_transducer_stateless_modified_2_aishell-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer_stateless_modified_2_aishell:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
@ -50,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -69,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless-modified-aishell.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: run_pre_trained_transducer_stateless_modified_aishell-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer_stateless_modified_aishell:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
@ -50,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -69,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer-stateless.yml
+++ b/.github/workflows/run-pretrained-transducer-stateless.yml
@ -32,6 +32,10 @@ on:
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_pre_trained_transducer_stateless-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer_stateless:
    if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
@ -59,7 +63,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -118,7 +122,7 @@ jobs:
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-pretrained-transducer.yml
+++ b/.github/workflows/run-pretrained-transducer.yml
@ -23,6 +23,10 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: run_pre_trained_transducer-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_pre_trained_transducer:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
@ -50,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -69,7 +73,7 @@ jobs:
      - name: Inference with pre-trained model
        shell: bash
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-ptb-rnn-lm.yml
+++ b/.github/workflows/run-ptb-rnn-lm.yml
@ -0,0 +1,71 @@
 name: run-ptb-rnn-lm-training
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: run_ptb_rnn_lm_training-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_ptb_rnn_lm_training:
    if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Prepare data
        shell: bash
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          cd egs/ptb/LM
          ./prepare.sh
      - name: Run training
        shell: bash
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          cd egs/ptb/LM
          ./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
      - name: Upload pretrained models
        uses: actions/upload-artifact@v2
        if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
        with:
          name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
          path: egs/ptb/LM/my-rnnlm-exp/
--- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
+++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
@ -23,8 +23,12 @@ on:
  pull_request:
    types: [labeled]
 concurrency:
  group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
  cancel-in-progress: true
 jobs:
-  run_librispeech_pruned_transducer_stateless3_2022_05_13:
+  run_wenetspeech_pruned_transducer_stateless2:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
    runs-on: ${{ matrix.os }}
    strategy:
@ -50,7 +54,7 @@ jobs:
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
@ -72,7 +76,7 @@ jobs:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
-          sudo apt-get -qq install git-lfs tree sox
+          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@ -21,17 +21,21 @@ on:
    branches:
      - master
  pull_request:
-    types: [labeled]
+    branches:
      - master
 concurrency:
  group: run-yesno-recipe-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run-yesno-recipe:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        # os: [ubuntu-18.04, macos-10.15]
        # TODO: enable macOS for CPU testing
-        os: [ubuntu-18.04]
+        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
@ -61,9 +65,9 @@ jobs:
      - name: Install Python dependencies
        run: |
-          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
+          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
      - name: Run yesno recipe
        shell: bash
--- a/.github/workflows/style_check.yml
+++ b/.github/workflows/style_check.yml
@ -24,6 +24,10 @@ on:
    branches:
      - master
 concurrency:
  group: style_check-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  style_check:
    runs-on: ${{ matrix.os }}
@ -45,17 +49,18 @@ jobs:
      - name: Install Python dependencies
        run: |
-          python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2 click==8.0.4
+          python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0
-          # See https://github.com/psf/black/issues/2964
+          # Click issue fixed in https://github.com/psf/black/pull/2966
          # The version of click should be selected from 8.0.0, 8.0.1, 8.0.2, 8.0.3, and 8.0.4
      - name: Run flake8
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --show-source --statistics
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          flake8 .
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 \
            --statistics --extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503
      - name: Run black
        shell: bash
--- a/.github/workflows/test-ncnn-export.yml
+++ b/.github/workflows/test-ncnn-export.yml
@ -0,0 +1,75 @@
 name: test-ncnn-export
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: test_ncnn_export-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test_ncnn_export:
    if: github.event.label.name == 'ready' || github.event.label.name == 'ncnn' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Test ncnn export
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/test-ncnn-export.sh
--- a/.github/workflows/test-onnx-export.yml
+++ b/.github/workflows/test-onnx-export.yml
@ -0,0 +1,75 @@
 name: test-onnx-export
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
 concurrency:
  group: test_onnx_export-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test_onnx_export:
    if: github.event.label.name == 'ready' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2022-09-25
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Test ONNX export
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/test-onnx-export.sh
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -21,26 +21,23 @@ on:
    branches:
      - master
  pull_request:
-    types: [labeled]
+    branches:
      - master
 concurrency:
  group: test-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test:
    if: github.event.label.name == 'ready' || github.event_name == 'push'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        # os: [ubuntu-18.04, macos-10.15]
+        os: [ubuntu-latest]
-        # disable macOS test for now.
+        python-version: ["3.8"]
-        os: [ubuntu-18.04]
+        torch: ["1.10.0"]
-        python-version: [3.7, 3.8]
+        torchaudio: ["0.10.0"]
-        torch: ["1.8.0", "1.11.0"]
+        k2-version: ["1.23.2.dev20221201"]
        torchaudio: ["0.8.0", "0.11.0"]
        k2-version: ["1.15.1.dev20220427"]
        exclude:
          - torch: "1.8.0"
            torchaudio: "0.11.0"
          - torch: "1.11.0"
            torchaudio: "0.8.0"
      fail-fast: false
@ -59,7 +56,7 @@ jobs:
        run: |
          sudo apt update
          sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
-          sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
+          sudo apt install -q -y --fix-missing libsox-dev libsox-fmt-all
      - name: Install Python dependencies
        run: |
@ -67,21 +64,16 @@ jobs:
          # numpy 1.20.x does not support python 3.6
          pip install numpy==1.19
          pip install torch==${{ matrix.torch }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
-          if [[ ${{ matrix.torchaudio }} == "0.11.0" ]]; then
+          pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
            pip install torchaudio==${{ matrix.torchaudio }}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
          else
            pip install torchaudio==${{ matrix.torchaudio }}
          fi
          pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
          pip install git+https://github.com/lhotse-speech/lhotse
          # icefall requirements
          pip uninstall -y protobuf
-          pip install --no-binary protobuf protobuf
+          pip install --no-binary protobuf protobuf==3.20.*
          pip install kaldifst
          pip install onnxruntime
          pip install -r requirements.txt
      - name: Install graphviz
@ -121,19 +113,20 @@ jobs:
          cd ../pruned_transducer_stateless4
          pytest -v -s
          cd ../pruned_transducer_stateless7
          pytest -v -s
          cd ../transducer_stateless
          pytest -v -s
-          if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
+          # cd ../transducer
-            cd ../transducer
+          # pytest -v -s
            pytest -v -s
-            cd ../transducer_stateless2
+          cd ../transducer_stateless2
-            pytest -v -s
+          pytest -v -s
-            cd ../transducer_lstm
+          cd ../transducer_lstm
-            pytest -v -s
+          pytest -v -s
          fi
      - name: Run tests
        if: startsWith(matrix.os, 'macos')
@ -164,13 +157,11 @@ jobs:
          cd ../transducer_stateless
          pytest -v -s
-          if [[ ${{ matrix.torchaudio }} == "0.10.0" ]]; then
+          # cd ../transducer
-            cd ../transducer
+          # pytest -v -s
            pytest -v -s
-            cd ../transducer_stateless2
+          cd ../transducer_stateless2
-            pytest -v -s
+          pytest -v -s
-            cd ../transducer_lstm
+          cd ../transducer_lstm
-            pytest -v -s
+          pytest -v -s
          fi
--- a/.gitignore
+++ b/.gitignore
@ -11,5 +11,26 @@ log
 *.bak
 *-bak
 *bak.py
 # Ignore Mac system files
 .DS_store
 # Ignore node_modules folder
 node_modules
 # ignore .nfs
 .nfs*
 # Ignore all text files
 *.txt
 # Ignore files related to API keys
 .env
 # Ignore SASS config files
 .sass-cache
 *.param
 *.bin
 .DS_Store
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,26 +1,38 @@
 repos:
  - repo: https://github.com/psf/black
-    rev: 21.6b0
+    rev: 22.3.0
    hooks:
      - id: black
-        args: [--line-length=80]
+        args: ["--line-length=88"]
-        additional_dependencies: ['click==8.0.1']
+        additional_dependencies: ['click==8.1.0']
        exclude: icefall\/__init__\.py
  - repo: https://github.com/PyCQA/flake8
-    rev: 3.9.2
+    rev: 5.0.4
    hooks:
      - id: flake8
-        args: [--max-line-length=80]
+        args: ["--max-line-length=88", "--extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503"]
      # What are we ignoring here?
      # E203: whitespace before ':'
      # E266: too many leading '#' for block comment
      # E501: line too long
      # F401: module imported but unused
      # E402: module level import not at top of file
      # F403: 'from module import *' used; unable to detect undefined names
      # F841: local variable is assigned to but never used
      # W503: line break before binary operator
      # In addition, the default ignore list is:
      # E121,E123,E126,E226,E24,E704,W503,W504
  - repo: https://github.com/pycqa/isort
-    rev: 5.9.2
+    rev: 5.10.1
    hooks:
      - id: isort
-        args: [--profile=black, --line-length=80]
+        args: ["--profile=black"]
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v4.2.0
    hooks:
      - id: check-executables-have-shebangs
      - id: end-of-file-fixer
--- a/9
+++ b/9
@ -1,13 +1,4 @@
                                 Legal Notices
   NOTE (this is not from the Apache License): The copyright model is that
   authors (or their employers, if noted in individual files) own their
   individual contributions. The authors' contributions can be discerned
   from the git history.
 -------------------------------------------------------------------------
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
--- a/docker/README.md
+++ b/docker/README.md
@ -2,7 +2,7 @@
 2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
-If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8. 
+If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8.
 Otherwise, since the older PyTorch images are not updated with the [apt-key rotation by NVIDIA](https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key), you have to go for case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. Ensure that your NVDIA driver supports at least CUDA 11.0.
@ -10,7 +10,7 @@ You can check the highest CUDA version within your NVIDIA driver's support with
 ```bash
 $ nvidia-smi
-Tue Sep 20 00:26:13 2022       
+Tue Sep 20 00:26:13 2022
 +-----------------------------------------------------------------------------+
 | NVIDIA-SMI 450.119.03   Driver Version: 450.119.03   CUDA Version: 11.0     |
 |-------------------------------+----------------------+----------------------+
@ -26,7 +26,7 @@ Tue Sep 20 00:26:13 2022
 | 41%   30C    P8    11W / 280W |      6MiB / 24220MiB |      0%      Default |
 |                               |                      |                  N/A |
 +-------------------------------+----------------------+----------------------+
-                                                                               
+
 +-----------------------------------------------------------------------------+
 | Processes:                                                                  |
 |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
@ -40,15 +40,15 @@ Tue Sep 20 00:26:13 2022
 ```
 ## Building images locally
-If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly. 
+If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly.
-For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details. 
+For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details.
 ```dockerfile
 ENV http_proxy=http://aaa.bb.cc.net:8080 \
    https_proxy=http://aaa.bb.cc.net:8080
 ```
-Then, proceed with these commands. 
+Then, proceed with these commands.
 ### If you are case (a), i.e. your NVIDIA driver supports CUDA version >= 11.3:
@ -72,11 +72,11 @@ docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all icefall
 ```
 ### Tips:
-1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`. 
+1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`.
 2. Also, if your environment requires a proxy, this would be a good time to add it in too: `-e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080`.
-Overall, your docker run command should look like this. 
+Overall, your docker run command should look like this.
 ```bash
 docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all -v {/path/in/host/machine}:{/path/in/docker} -e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080 icefall/pytorch1.12.1
@ -86,9 +86,9 @@ You can explore more docker run options [here](https://docs.docker.com/engine/re
 ### Linking to icefall in your host machine
-If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container. 
+If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container.
-Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine. 
+Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine.
 Warning: Check that the icefall in your host machine is visible from within your container before proceeding to the commands below.
 Use these commands once you are inside the container.
@ -103,7 +103,7 @@ ln -s {/path/in/docker/to/icefall} /workspace/icefall
 docker exec -it icefall /bin/bash
 ```
-## Restarting a killed container that has been run before. 
+## Restarting a killed container that has been run before.
 ```bash
 docker start -ai icefall
 ```
@ -111,4 +111,4 @@ docker start -ai icefall
 ## Sample usage of the CPU based images:
 ```bash
 docker run -it icefall /bin/bash
-``` 
+```
--- a/docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8/Dockerfile
+++ b/docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8/Dockerfile
@ -1,7 +1,7 @@
 FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
 # ENV http_proxy=http://aaa.bbb.cc.net:8080 \
-#	https_proxy=http://aaa.bbb.cc.net:8080 
+#	https_proxy=http://aaa.bbb.cc.net:8080
 # install normal source
 RUN apt-get update && \
@ -38,10 +38,10 @@ RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
    rm -rf cmake-3.18.0.tar.gz && \
    find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
-	
+
-# flac 
+# flac
 RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  && \
-    cd /opt && \ 
+    cd /opt && \
    xz -d flac-1.3.2.tar.xz && \
    tar -xvf flac-1.3.2.tar && \
    cd flac-1.3.2 && \
@ -49,11 +49,11 @@ RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  &&
    make && make install && \
    rm -rf flac-1.3.2.tar && \
    find /opt/flac-1.3.2  -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
-    cd - 
+    cd -
 RUN conda install -y -c pytorch torchaudio=0.12 && \
    pip install graphviz
-	
+
 #install k2 from source
 RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
@ -68,6 +68,7 @@ RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 	cd /workspace/icefall && \
 	pip install -r requirements.txt
 RUN pip install kaldifeat
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
-WORKDIR /workspace/icefall
+WORKDIR /workspace/icefall
--- a/docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8/Dockerfile
+++ b/docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8/Dockerfile
@ -1,12 +1,12 @@
 FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
 # ENV http_proxy=http://aaa.bbb.cc.net:8080 \
-#	https_proxy=http://aaa.bbb.cc.net:8080 
+#	https_proxy=http://aaa.bbb.cc.net:8080
 RUN rm /etc/apt/sources.list.d/cuda.list && \
 	rm /etc/apt/sources.list.d/nvidia-ml.list && \
 	apt-key del 7fa2af80
-	
+
 # install normal source
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
@ -36,7 +36,7 @@ RUN curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu18
 	curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
 	echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
 	echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
-	rm -rf /var/lib/apt/lists/* && \ 
+	rm -rf /var/lib/apt/lists/* && \
 	mv /opt/conda/lib/libcufft.so.10 /opt/libcufft.so.10.bak && \
    mv /opt/conda/lib/libcurand.so.10 /opt/libcurand.so.10.bak && \
    mv /opt/conda/lib/libcublas.so.11 /opt/libcublas.so.11.bak && \
@ -56,10 +56,10 @@ RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
    rm -rf cmake-3.18.0.tar.gz && \
    find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
-	
+
-# flac 
+# flac
 RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  && \
-    cd /opt && \ 
+    cd /opt && \
    xz -d flac-1.3.2.tar.xz && \
    tar -xvf flac-1.3.2.tar && \
    cd flac-1.3.2 && \
@ -67,7 +67,7 @@ RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  &&
    make && make install && \
    rm -rf flac-1.3.2.tar && \
    find /opt/flac-1.3.2  -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
-    cd - 
+    cd -
 RUN conda install -y -c pytorch torchaudio=0.7.1 && \
    pip install graphviz
@ -79,7 +79,7 @@ RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
    cd -
 # install  lhotse
-RUN pip install git+https://github.com/lhotse-speech/lhotse 
+RUN pip install git+https://github.com/lhotse-speech/lhotse
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 	cd /workspace/icefall && \
@ -88,4 +88,3 @@ RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docs/README.md
+++ b/docs/README.md
@ -0,0 +1,24 @@
 ## Usage
 ```bash
 cd /path/to/icefall/docs
 pip install -r requirements.txt
 make clean
 make html
 cd build/html
 python3 -m http.server 8000
 ```
 It prints:
 ```
 Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
 ```
 Open your browser and go to <http://0.0.0.0:8000/> to view the generated
 documentation.
 Done!
 **Hint**: You can change the port number when starting the server.
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -78,3 +78,15 @@ html_context = {
 }
 todo_include_todos = True
 rst_epilog = """
 .. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
 .. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
 .. _icefall: https://github.com/k2-fsa/icefall
 .. _git-lfs: https://git-lfs.com/
 .. _ncnn: https://github.com/tencent/ncnn
 .. _LibriSpeech: https://www.openslr.org/12
 .. _musan: http://www.openslr.org/17/
 .. _ONNX: https://github.com/onnx/onnx
 .. _onnxruntime: https://github.com/microsoft/onnxruntime
 """
--- a/docs/source/contributing/code-style.rst
+++ b/docs/source/contributing/code-style.rst
@ -11,9 +11,9 @@ We use the following tools to make the code style to be as consistent as possibl
 The following versions of the above tools are used:
-  - ``black == 12.6b0``
+  - ``black == 22.3.0``
-  - ``flake8 == 3.9.2``
+  - ``flake8 == 5.0.4``
-  - ``isort == 5.9.2``
+  - ``isort == 5.10.1``
 After running the following commands:
@ -54,10 +54,17 @@ it should succeed this time:
 If you want to check the style of your code before ``git commit``, you
 can do the following:
  .. code-block:: bash
    $ pre-commit install
    $ pre-commit run
 Or without installing the pre-commit hooks:
  .. code-block:: bash
    $ cd icefall
-    $ pip install black==21.6b0 flake8==3.9.2 isort==5.9.2
+    $ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
    $ black --check your_changed_file.py
    $ black your_changed_file.py  # modify it in-place
    $
--- a/docs/source/faqs.rst
+++ b/docs/source/faqs.rst
@ -0,0 +1,107 @@
 Frequently Asked Questions (FAQs)
 =================================
 In this section, we collect issues reported by users and post the corresponding
 solutions.
 OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
 -----------------------------------------------------------------------------------
 One user is using the following code to install ``torch`` and ``torchaudio``:
 .. code-block:: bash
  pip install \
    torch==1.10.0+cu111 \
    torchvision==0.11.0+cu111 \
    torchaudio==0.10.0 \
    -f https://download.pytorch.org/whl/torch_stable.html
 and it throws the following error when running ``tdnn/train.py``:
 .. code-block::
  OSError: libtorch_hip.so: cannot open shared object file: no such file or directory
 The fix is to specify the CUDA version while installing ``torchaudio``. That
 is, change ``torchaudio==0.10.0`` to ``torchaudio==0.10.0+cu11```. Therefore,
 the correct command is:
 .. code-block:: bash
  pip install \
    torch==1.10.0+cu111 \
    torchvision==0.11.0+cu111 \
    torchaudio==0.10.0+cu111 \
    -f https://download.pytorch.org/whl/torch_stable.html
 AttributeError: module 'distutils' has no attribute 'version'
 -------------------------------------------------------------
 The error log is:
 .. code-block::
  Traceback (most recent call last):
    File "./tdnn/train.py", line 14, in <module>
      from asr_datamodule import YesNoAsrDataModule
    File "/home/xxx/code/next-gen-kaldi/icefall/egs/yesno/ASR/tdnn/asr_datamodule.py", line 34, in <module>
      from icefall.dataset.datamodule import DataModule
    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/__init__.py", line 3, in <module>
      from . import (
    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/decode.py", line 23, in <module>
      from icefall.utils import add_eos, add_sos, get_texts
    File "/home/xxx/code/next-gen-kaldi/icefall/icefall/utils.py", line 39, in <module>
      from torch.utils.tensorboard import SummaryWriter
    File "/home/xxx/tool/miniconda3/envs/yyy/lib/python3.8/site-packages/torch/utils/tensorboard/__init__.py", line 4, in <module>
      LooseVersion = distutils.version.LooseVersion
  AttributeError: module 'distutils' has no attribute 'version'
 The fix is:
 .. code-block:: bash
  pip uninstall setuptools
  pip install setuptools==58.0.4
 ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
 --------------------------------------------------------------------------------------------
 If you are using ``conda`` and encounter the following issue:
 .. code-block::
  Traceback (most recent call last):
    File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 24, in <module>
      from _k2 import DeterminizeWeightPushingType
  ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
  During handling of the above exception, another exception occurred:
  Traceback (most recent call last):
    File "/k2-dev/yangyifan/icefall/egs/librispeech/ASR/./pruned_transducer_stateless7_ctc_bs/decode.py", line 104, in <module>
      import k2
    File "/k2-dev/yangyifan/anaconda3/envs/icefall/lib/python3.10/site-packages/k2-1.23.3.dev20230112+cuda11.6.torch1.13.1-py3.10-linux-x86_64.egg/k2/__init__.py", line 30, in <module>
      raise ImportError(
  ImportError: libpython3.10.so.1.0: cannot open shared object file: No such file or directory
  Note: If you're using anaconda and importing k2 on MacOS,
        you can probably fix this by setting the environment variable:
    export DYLD_LIBRARY_PATH=$CONDA_PREFIX/lib/python3.10/site-packages:$DYLD_LIBRARY_PATH
 Please first try to find where ``libpython3.10.so.1.0`` locates.
 For instance,
 .. code-block:: bash
  cd $CONDA_PREFIX/lib
  find . -name "libpython*"
 If you are able to find it inside ``$CODNA_PREFIX/lib``, please set the
 following environment variable:
 .. code-block:: bash
  export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -21,7 +21,16 @@ speech recognition recipes using `k2 <https://github.com/k2-fsa/k2>`_.
   :caption: Contents:
   installation/index
   faqs
   model-export/index
 .. toctree::
   :maxdepth: 3
   recipes/index
 .. toctree::
   :maxdepth: 2
   contributing/index
   huggingface/index
--- a/docs/source/installation/images/k2-gt-v1.9-blueviolet.svg
+++ b/docs/source/installation/images/k2-gt-v1.9-blueviolet.svg
@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: &gt;= v1.9"><title>k2: &gt;= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">&gt;= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">&gt;= v1.9</text></g></svg>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: &gt;= v1.9"><title>k2: &gt;= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">&gt;= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">&gt;= v1.9</text></g></svg>
--- a/docs/source/installation/images/python-gt-v3.6-blue.svg
+++ b/docs/source/installation/images/python-gt-v3.6-blue.svg
@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: &gt;= 3.6"><title>python: &gt;= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">&gt;= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">&gt;= 3.6</text></g></svg>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="python: &gt;= 3.6"><title>python: &gt;= 3.6</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="98" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="49" height="20" fill="#555"/><rect x="49" width="49" height="20" fill="#007ec6"/><rect width="98" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="255" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">python</text><text x="255" y="140" transform="scale(.1)" fill="#fff" textLength="390">python</text><text aria-hidden="true" x="725" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="390">&gt;= 3.6</text><text x="725" y="140" transform="scale(.1)" fill="#fff" textLength="390">&gt;= 3.6</text></g></svg>
--- a/docs/source/installation/images/torch-gt-v1.6.0-green.svg
+++ b/docs/source/installation/images/torch-gt-v1.6.0-green.svg
@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: &gt;= 1.6.0"><title>torch: &gt;= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">&gt;= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">&gt;= 1.6.0</text></g></svg>
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="torch: &gt;= 1.6.0"><title>torch: &gt;= 1.6.0</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="39" height="20" fill="#555"/><rect x="39" width="61" height="20" fill="#97ca00"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="205" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="290">torch</text><text x="205" y="140" transform="scale(.1)" fill="#fff" textLength="290">torch</text><text aria-hidden="true" x="685" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">&gt;= 1.6.0</text><text x="685" y="140" transform="scale(.1)" fill="#fff" textLength="510">&gt;= 1.6.0</text></g></svg>
--- a/docs/source/installation/index.rst
+++ b/docs/source/installation/index.rst
@ -393,6 +393,17 @@ Now let us run the training part:
  We use ``export CUDA_VISIBLE_DEVICES=""`` so that ``icefall`` uses CPU
  even if there are GPUs available.
 .. hint::
   In case you get a ``Segmentation fault (core dump)`` error, please use:
      .. code-block:: bash
        export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
   See more at `<https://github.com/k2-fsa/icefall/issues/674>` if you are
   interested.
 The training log is given below:
 .. code-block::
--- a/docs/source/model-export/code/export-conv-emformer-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-conv-emformer-transducer-for-ncnn-output.txt
@ -0,0 +1,21 @@
 2023-01-11 12:15:38,677 INFO [export-for-ncnn.py:220] device: cpu
 2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:229] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_v
 alid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampl
 ing_factor': 4, 'decoder_dim': 512, 'joiner_dim': 512, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.2', 'k2-build-type':
 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'a34171ed85605b0926eebbd0463d059431f4f74a', 'k2-git-date': 'Wed Dec 14 00:06:38 2022',
 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-vers
 ion': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'fix-stateless3-train-2022-12-27', 'icefall-git-sha1': '530e8a1-dirty', '
 icefall-git-date': 'Tue Dec 27 13:59:18 2022', 'icefall-path': '/star-fj/fangjun/open-source/icefall', 'k2-path': '/star-fj/fangjun/op
 en-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279
 -k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '127.0.0.1'}, 'epoch': 30, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefa
 ll-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp'), 'bpe_model': './icefall-asr-librispeech-conv-emformer-transdu
 cer-stateless2-2022-07-05//data/lang_bpe_500/bpe.model', 'jit': False, 'context_size': 2, 'use_averaged_model': False, 'encoder_dim':
 512, 'nhead': 8, 'dim_feedforward': 2048, 'num_encoder_layers': 12, 'cnn_module_kernel': 31, 'left_context_length': 32, 'chunk_length'
 : 32, 'right_context_length': 8, 'memory_size': 32, 'blank_id': 0, 'vocab_size': 500}
 2023-01-11 12:15:38,681 INFO [export-for-ncnn.py:231] About to create model
 2023-01-11 12:15:40,053 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-conv-emformer-transducer-stateless2-2
 022-07-05/exp/epoch-30.pt
 2023-01-11 12:15:40,708 INFO [export-for-ncnn.py:315] Number of model parameters: 75490012
 2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:318] Using torch.jit.trace()
 2023-01-11 12:15:41,681 INFO [export-for-ncnn.py:320] Exporting encoder
 2023-01-11 12:15:41,682 INFO [export-for-ncnn.py:149] chunk_length: 32, right_context_length: 8
--- a/docs/source/model-export/code/export-lstm-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-lstm-transducer-for-ncnn-output.txt
@ -0,0 +1,18 @@
 2023-02-17 11:22:42,862 INFO [export-for-ncnn.py:222] device: cpu
 2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:231] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'dim_feedforward': 2048, 'decoder_dim': 512, 'joiner_dim': 512, 'is_pnnx': False, 'model_warm_step': 3000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': False, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-dirty', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp'), 'bpe_model': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': 12, 'encoder_dim': 512, 'rnn_hidden_size': 1024, 'aux_layer_period': 0, 'blank_id': 0, 'vocab_size': 500}
 2023-02-17 11:22:42,865 INFO [export-for-ncnn.py:235] About to create model
 2023-02-17 11:22:43,239 INFO [train.py:472] Disable giga
 2023-02-17 11:22:43,249 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/epoch-99.pt
 2023-02-17 11:22:44,595 INFO [export-for-ncnn.py:324] encoder parameters: 83137520
 2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:325] decoder parameters: 257024
 2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:326] joiner parameters: 781812
 2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:327] total parameters: 84176356
 2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:329] Using torch.jit.trace()
 2023-02-17 11:22:44,596 INFO [export-for-ncnn.py:331] Exporting encoder
 2023-02-17 11:22:48,182 INFO [export-for-ncnn.py:158] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
 2023-02-17 11:22:48,183 INFO [export-for-ncnn.py:335] Exporting decoder
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/lstm_transducer_stateless2/decoder.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  need_pad = bool(need_pad)
 2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:180] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
 2023-02-17 11:22:48,259 INFO [export-for-ncnn.py:339] Exporting joiner
 2023-02-17 11:22:48,304 INFO [export-for-ncnn.py:207] Saved to icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
--- a/docs/source/model-export/code/export-zipformer-transducer-for-ncnn-output.txt
+++ b/docs/source/model-export/code/export-zipformer-transducer-for-ncnn-output.txt
@ -0,0 +1,74 @@
 2023-02-27 20:23:07,473 INFO [export-for-ncnn.py:246] device: cpu
 2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:255] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.23.4', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '62e404dd3f3a811d73e424199b3408e309c06e1a', 'k2-git-date': 'Mon Jan 30 10:26:16 2023', 'lhotse-version': '1.12.0.dev+missing.version.file', 'torch-version': '1.10.0+cu102', 'torch-cuda-available': True, 'torch-cuda-version': '10.2', 'python-version': '3.8', 'icefall-git-branch': 'master', 'icefall-git-sha1': '6d7a559-clean', 'icefall-git-date': 'Thu Feb 16 19:47:54 2023', 'icefall-path': '/star-fj/fangjun/open-source/icefall-2', 'k2-path': '/star-fj/fangjun/open-source/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-fj/fangjun/open-source/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-3-1220120619-7695ff496b-s9n4w', 'IP address': '10.177.6.147'}, 'epoch': 99, 'iter': 0, 'avg': 1, 'exp_dir': PosixPath('icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp'), 'bpe_model': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model', 'context_size': 2, 'use_averaged_model': False, 'num_encoder_layers': '2,4,3,2,4', 'feedforward_dims': '1024,1024,2048,2048,1024', 'nhead': '8,8,8,8,8', 'encoder_dims': '384,384,384,384,384', 'attention_dims': '192,192,192,192,192', 'encoder_unmasked_dims': '256,256,256,256,256', 'zipformer_downsampling_factors': '1,2,4,8,2', 'cnn_module_kernels': '31,31,31,31,31', 'decoder_dim': 512, 'joiner_dim': 512, 'short_chunk_size': 50, 'num_left_chunks': 4, 'decode_chunk_len': 32, 'blank_id': 0, 'vocab_size': 500}
 2023-02-27 20:23:07,477 INFO [export-for-ncnn.py:257] About to create model
 2023-02-27 20:23:08,023 INFO [zipformer2.py:419] At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
 2023-02-27 20:23:08,037 INFO [checkpoint.py:112] Loading checkpoint from icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/epoch-99.pt
 2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:346] encoder parameters: 68944004
 2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:347] decoder parameters: 260096
 2023-02-27 20:23:08,655 INFO [export-for-ncnn.py:348] joiner parameters: 716276
 2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:349] total parameters: 69920376
 2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:351] Using torch.jit.trace()
 2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:353] Exporting encoder
 2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:174] decode_chunk_len: 32
 2023-02-27 20:23:08,656 INFO [export-for-ncnn.py:175] T: 39
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1344: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_len.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1348: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_avg.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1352: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_key.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1356: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_val.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1360: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_val2.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1364: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_conv1.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1368: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_conv2.size(0) == self.num_layers, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1373: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert self.left_context_len == cached_key.shape[1], (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1884: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert self.x_size == x.size(0), (self.x_size, x.size(0))
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2442: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_key.shape[0] == self.left_context_len, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2449: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_key.shape[0] == cached_val.shape[0], (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2469: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_key.shape[0] == left_context_len, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2473: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_val.shape[0] == left_context_len, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2483: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert kv_len == k.shape[0], (kv_len, k.shape)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2570: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert list(attn_output.size()) == [bsz * num_heads, seq_len, head_dim // 2]
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2926: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cache.shape == (x.size(0), x.size(1), self.lorder), (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2652: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert x.shape[0] == self.x_size, (x.shape[0], self.x_size)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2653: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert x.shape[2] == self.embed_dim, (x.shape[2], self.embed_dim)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:2666: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert cached_val.shape[0] == self.left_context_len, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1543: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src.shape[0] == self.in_x_size, (src.shape[0], self.in_x_size)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1637: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src.shape[0] == self.in_x_size, (
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1643: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src.shape[2] == self.in_channels, (src.shape[2], self.in_channels)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1571: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  if src.shape[0] != self.in_x_size:
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1763: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src1.shape[:-1] == src2.shape[:-1], (src1.shape, src2.shape)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1779: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src1.shape[-1] == self.dim1, (src1.shape[-1], self.dim1)
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/zipformer2.py:1780: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert src2.shape[-1] == self.dim2, (src2.shape[-1], self.dim2)
 /star-fj/fangjun/py38/lib/python3.8/site-packages/torch/jit/_trace.py:958: TracerWarning: Encountering a list at the output of the tracer might cause the trace to be incorrect, this is only valid if the container structure does not change based on the module's inputs. Consider using a constant container instead (e.g. for `list`, use a `tuple` instead. for `dict`, use a `NamedTuple` instead). If you absolutely need this and know the side effects, pass strict=False to trace() to allow this behavior.
  module._c._create_method_from_trace(
 2023-02-27 20:23:19,640 INFO [export-for-ncnn.py:182] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
 2023-02-27 20:23:19,646 INFO [export-for-ncnn.py:357] Exporting decoder
 /star-fj/fangjun/open-source/icefall-2/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/decoder.py:102: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  assert embedding_out.size(-1) == self.context_size
 2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:204] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
 2023-02-27 20:23:19,686 INFO [export-for-ncnn.py:361] Exporting joiner
 2023-02-27 20:23:19,735 INFO [export-for-ncnn.py:231] Saved to icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
--- a/docs/source/model-export/code/generate-int-8-scale-table-for-conv-emformer.txt
+++ b/docs/source/model-export/code/generate-int-8-scale-table-for-conv-emformer.txt
@ -0,0 +1,104 @@
 Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
 num encoder conv layers: 88
 num joiner conv layers: 3
 num files: 3
 Processing ../test_wavs/1089-134686-0001.wav
 Processing ../test_wavs/1221-135766-0001.wav
 Processing ../test_wavs/1221-135766-0002.wav
 Processing ../test_wavs/1089-134686-0001.wav
 Processing ../test_wavs/1221-135766-0001.wav
 Processing ../test_wavs/1221-135766-0002.wav
 ----------encoder----------
 conv_87                                  : max = 15.942385        threshold = 15.938493        scale = 7.968131
 conv_88                                  : max = 35.442448        threshold = 15.549335        scale = 8.167552
 conv_89                                  : max = 23.228289        threshold = 8.001738         scale = 15.871552
 linear_90                                : max = 3.976146         threshold = 1.101789         scale = 115.267128
 linear_91                                : max = 6.962030         threshold = 5.162033         scale = 24.602713
 linear_92                                : max = 12.323041        threshold = 3.853959         scale = 32.953129
 linear_94                                : max = 6.905416         threshold = 4.648006         scale = 27.323545
 linear_93                                : max = 6.905416         threshold = 5.474093         scale = 23.200188
 linear_95                                : max = 1.888012         threshold = 1.403563         scale = 90.483986
 linear_96                                : max = 6.856741         threshold = 5.398679         scale = 23.524273
 linear_97                                : max = 9.635942         threshold = 2.613655         scale = 48.590950
 linear_98                                : max = 6.460340         threshold = 5.670146         scale = 22.398010
 linear_99                                : max = 9.532276         threshold = 2.585537         scale = 49.119396
 linear_101                               : max = 6.585871         threshold = 5.719224         scale = 22.205809
 linear_100                               : max = 6.585871         threshold = 5.751382         scale = 22.081648
 linear_102                               : max = 1.593344         threshold = 1.450581         scale = 87.551147
 linear_103                               : max = 6.592681         threshold = 5.705824         scale = 22.257959
 linear_104                               : max = 8.752957         threshold = 1.980955         scale = 64.110489
 linear_105                               : max = 6.696240         threshold = 5.877193         scale = 21.608953
 linear_106                               : max = 9.059659         threshold = 2.643138         scale = 48.048950
 linear_108                               : max = 6.975461         threshold = 4.589567         scale = 27.671457
 linear_107                               : max = 6.975461         threshold = 6.190381         scale = 20.515701
 linear_109                               : max = 3.710759         threshold = 2.305635         scale = 55.082436
 linear_110                               : max = 7.531228         threshold = 5.731162         scale = 22.159557
 linear_111                               : max = 10.528083        threshold = 2.259322         scale = 56.211544
 linear_112                               : max = 8.148807         threshold = 5.500842         scale = 23.087374
 linear_113                               : max = 8.592566         threshold = 1.948851         scale = 65.166611
 linear_115                               : max = 8.437109         threshold = 5.608947         scale = 22.642395
 linear_114                               : max = 8.437109         threshold = 6.193942         scale = 20.503904
 linear_116                               : max = 3.966980         threshold = 3.200896         scale = 39.676392
 linear_117                               : max = 9.451303         threshold = 6.061664         scale = 20.951344
 linear_118                               : max = 12.077262        threshold = 3.965800         scale = 32.023804
 linear_119                               : max = 9.671615         threshold = 4.847613         scale = 26.198460
 linear_120                               : max = 8.625638         threshold = 3.131427         scale = 40.556595
 linear_122                               : max = 10.274080        threshold = 4.888716         scale = 25.978189
 linear_121                               : max = 10.274080        threshold = 5.420480         scale = 23.429659
 linear_123                               : max = 4.826197         threshold = 3.599617         scale = 35.281532
 linear_124                               : max = 11.396383        threshold = 7.325849         scale = 17.335875
 linear_125                               : max = 9.337198         threshold = 3.941410         scale = 32.221970
 linear_126                               : max = 9.699965         threshold = 4.842878         scale = 26.224073
 linear_127                               : max = 8.775370         threshold = 3.884215         scale = 32.696438
 linear_129                               : max = 9.872276         threshold = 4.837319         scale = 26.254213
 linear_128                               : max = 9.872276         threshold = 7.180057         scale = 17.687883
 linear_130                               : max = 4.150427         threshold = 3.454298         scale = 36.765789
 linear_131                               : max = 11.112692        threshold = 7.924847         scale = 16.025545
 linear_132                               : max = 11.852893        threshold = 3.116593         scale = 40.749626
 linear_133                               : max = 11.517084        threshold = 5.024665         scale = 25.275314
 linear_134                               : max = 10.683807        threshold = 3.878618         scale = 32.743618
 linear_136                               : max = 12.421055        threshold = 6.322729         scale = 20.086264
 linear_135                               : max = 12.421055        threshold = 5.309880         scale = 23.917679
 linear_137                               : max = 4.827781         threshold = 3.744595         scale = 33.915554
 linear_138                               : max = 14.422395        threshold = 7.742882         scale = 16.402161
 linear_139                               : max = 8.527538         threshold = 3.866123         scale = 32.849449
 linear_140                               : max = 12.128619        threshold = 4.657793         scale = 27.266134
 linear_141                               : max = 9.839593         threshold = 3.845993         scale = 33.021378
 linear_143                               : max = 12.442304        threshold = 7.099039         scale = 17.889746
 linear_142                               : max = 12.442304        threshold = 5.325038         scale = 23.849592
 linear_144                               : max = 5.929444         threshold = 5.618206         scale = 22.605080
 linear_145                               : max = 13.382126        threshold = 9.321095         scale = 13.625010
 linear_146                               : max = 9.894987         threshold = 3.867645         scale = 32.836517
 linear_147                               : max = 10.915313        threshold = 4.906028         scale = 25.886522
 linear_148                               : max = 9.614287         threshold = 3.908151         scale = 32.496181
 linear_150                               : max = 11.724932        threshold = 4.485588         scale = 28.312899
 linear_149                               : max = 11.724932        threshold = 5.161146         scale = 24.606939
 linear_151                               : max = 7.164453         threshold = 5.847355         scale = 21.719223
 linear_152                               : max = 13.086471        threshold = 5.984121         scale = 21.222834
 linear_153                               : max = 11.099524        threshold = 3.991601         scale = 31.816805
 linear_154                               : max = 10.054585        threshold = 4.489706         scale = 28.286930
 linear_155                               : max = 12.389185        threshold = 3.100321         scale = 40.963501
 linear_157                               : max = 9.982999         threshold = 5.154796         scale = 24.637253
 linear_156                               : max = 9.982999         threshold = 8.537706         scale = 14.875190
 linear_158                               : max = 8.420287         threshold = 6.502287         scale = 19.531588
 linear_159                               : max = 25.014746        threshold = 9.423280         scale = 13.477261
 linear_160                               : max = 45.633553        threshold = 5.715335         scale = 22.220921
 linear_161                               : max = 20.371849        threshold = 5.117830         scale = 24.815203
 linear_162                               : max = 12.492933        threshold = 3.126283         scale = 40.623318
 linear_164                               : max = 20.697504        threshold = 4.825712         scale = 26.317358
 linear_163                               : max = 20.697504        threshold = 5.078367         scale = 25.008038
 linear_165                               : max = 9.023975         threshold = 6.836278         scale = 18.577358
 linear_166                               : max = 34.860619        threshold = 7.259792         scale = 17.493614
 linear_167                               : max = 30.380934        threshold = 5.496160         scale = 23.107042
 linear_168                               : max = 20.691216        threshold = 4.733317         scale = 26.831076
 linear_169                               : max = 9.723948         threshold = 3.952728         scale = 32.129707
 linear_171                               : max = 21.034811        threshold = 5.366547         scale = 23.665123
 linear_170                               : max = 21.034811        threshold = 5.356277         scale = 23.710501
 linear_172                               : max = 10.556884        threshold = 5.729481         scale = 22.166058
 linear_173                               : max = 20.033039        threshold = 10.207264        scale = 12.442120
 linear_174                               : max = 11.597379        threshold = 2.658676         scale = 47.768131
 ----------joiner----------
 linear_2                                 : max = 19.293503        threshold = 14.305265        scale = 8.877850
 linear_1                                 : max = 10.812222        threshold = 8.766452         scale = 14.487047
 linear_3                                 : max = 0.999999         threshold = 0.999755         scale = 127.031174
 ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
--- a/docs/source/model-export/code/generate-int-8-scale-table-for-lstm.txt
+++ b/docs/source/model-export/code/generate-int-8-scale-table-for-lstm.txt
@ -0,0 +1,44 @@
 Don't Use GPU. has_gpu: 0, config.use_vulkan_compute: 1
 num encoder conv layers: 28
 num joiner conv layers: 3
 num files: 3
 Processing ../test_wavs/1089-134686-0001.wav
 Processing ../test_wavs/1221-135766-0001.wav
 Processing ../test_wavs/1221-135766-0002.wav
 Processing ../test_wavs/1089-134686-0001.wav
 Processing ../test_wavs/1221-135766-0001.wav
 Processing ../test_wavs/1221-135766-0002.wav
 ----------encoder----------
 conv_15                                  : max = 15.942385        threshold = 15.930708        scale = 7.972025
 conv_16                                  : max = 44.978855        threshold = 17.031788        scale = 7.456645
 conv_17                                  : max = 17.868437        threshold = 7.830528         scale = 16.218575
 linear_18                                : max = 3.107259         threshold = 1.194808         scale = 106.293236
 linear_19                                : max = 6.193777         threshold = 4.634748         scale = 27.401705
 linear_20                                : max = 9.259933         threshold = 2.606617         scale = 48.722160
 linear_21                                : max = 5.186600         threshold = 4.790260         scale = 26.512129
 linear_22                                : max = 9.759041         threshold = 2.265832         scale = 56.050053
 linear_23                                : max = 3.931209         threshold = 3.099090         scale = 40.979767
 linear_24                                : max = 10.324160        threshold = 2.215561         scale = 57.321835
 linear_25                                : max = 3.800708         threshold = 3.599352         scale = 35.284134
 linear_26                                : max = 10.492444        threshold = 3.153369         scale = 40.274391
 linear_27                                : max = 3.660161         threshold = 2.720994         scale = 46.674126
 linear_28                                : max = 9.415265         threshold = 3.174434         scale = 40.007133
 linear_29                                : max = 4.038418         threshold = 3.118534         scale = 40.724262
 linear_30                                : max = 10.072084        threshold = 3.936867         scale = 32.259155
 linear_31                                : max = 4.342712         threshold = 3.599489         scale = 35.282787
 linear_32                                : max = 11.340535        threshold = 3.120308         scale = 40.701103
 linear_33                                : max = 3.846987         threshold = 3.630030         scale = 34.985939
 linear_34                                : max = 10.686298        threshold = 2.204571         scale = 57.607586
 linear_35                                : max = 4.904821         threshold = 4.575518         scale = 27.756420
 linear_36                                : max = 11.806659        threshold = 2.585589         scale = 49.118401
 linear_37                                : max = 6.402340         threshold = 5.047157         scale = 25.162680
 linear_38                                : max = 11.174589        threshold = 1.923361         scale = 66.030258
 linear_39                                : max = 16.178576        threshold = 7.556058         scale = 16.807705
 linear_40                                : max = 12.901954        threshold = 5.301267         scale = 23.956539
 linear_41                                : max = 14.839805        threshold = 7.597429         scale = 16.716181
 linear_42                                : max = 10.178945        threshold = 2.651595         scale = 47.895699
 ----------joiner----------
 linear_2                                 : max = 24.829245        threshold = 16.627592        scale = 7.637907
 linear_1                                 : max = 10.746186        threshold = 5.255032         scale = 24.167313
 linear_3                                 : max = 1.000000         threshold = 0.999756         scale = 127.031013
 ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\(^0^)/...233...
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
@ -0,0 +1,7 @@
 2023-01-11 14:02:12,216 INFO [streaming-ncnn-decode.py:320] {'tokens': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav'}
 T 51 32
 2023-01-11 14:02:13,141 INFO [streaming-ncnn-decode.py:328] Constructing Fbank computer
 2023-01-11 14:02:13,151 INFO [streaming-ncnn-decode.py:331] Reading sound files: ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
 2023-01-11 14:02:13,176 INFO [streaming-ncnn-decode.py:336] torch.Size([106000])
 2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:380] ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
 2023-01-11 14:02:17,581 INFO [streaming-ncnn-decode.py:381] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
@ -0,0 +1,6 @@
 2023-02-17 11:37:30,861 INFO [streaming-ncnn-decode.py:255] {'tokens': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav'}
 2023-02-17 11:37:31,425 INFO [streaming-ncnn-decode.py:263] Constructing Fbank computer
 2023-02-17 11:37:31,427 INFO [streaming-ncnn-decode.py:266] Reading sound files: ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
 2023-02-17 11:37:31,431 INFO [streaming-ncnn-decode.py:271] torch.Size([106000])
 2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:342] ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
 2023-02-17 11:37:34,115 INFO [streaming-ncnn-decode.py:343] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
+++ b/docs/source/model-export/code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
@ -0,0 +1,7 @@
 2023-02-27 20:43:40,283 INFO [streaming-ncnn-decode.py:349] {'tokens': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt', 'encoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param', 'encoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin', 'decoder_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param', 'decoder_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin', 'joiner_param_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param', 'joiner_bin_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin', 'sound_filename': './icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav'}
 2023-02-27 20:43:41,260 INFO [streaming-ncnn-decode.py:357] Constructing Fbank computer
 2023-02-27 20:43:41,264 INFO [streaming-ncnn-decode.py:360] Reading sound files: ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
 2023-02-27 20:43:41,269 INFO [streaming-ncnn-decode.py:365] torch.Size([106000])
 2023-02-27 20:43:41,280 INFO [streaming-ncnn-decode.py:372] number of states: 35
 2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:410] ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
 2023-02-27 20:43:45,026 INFO [streaming-ncnn-decode.py:411] AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD LIGHT UP HERE AND THERE THE SQUALID QUARTER OF THE BROTHELS
--- a/docs/source/model-export/export-ncnn-conv-emformer.rst
+++ b/docs/source/model-export/export-ncnn-conv-emformer.rst
@ -0,0 +1,753 @@
 .. _export_conv_emformer_transducer_models_to_ncnn:
 Export ConvEmformer transducer models to ncnn
 =============================================
 We use the pre-trained model from the following repository as an example:
  - `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
 We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
 .. hint::
  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
 .. caution::
  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
  may ``not`` work.
 1. Download the pre-trained model
 ---------------------------------
 .. hint::
  You can also refer to `<https://k2-fsa.github.io/sherpa/cpp/pretrained_models/online_transducer.html#icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_ to download the pre-trained model.
  You have to install `git-lfs`_ before you continue.
 .. code-block:: bash
  cd egs/librispeech/ASR
  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
  git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
  git lfs pull --include "data/lang_bpe_500/bpe.model"
  cd ..
 .. note::
  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
 In the above code, we downloaded the pre-trained model into the directory
 ``egs/librispeech/ASR/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05``.
 .. _export_for_ncnn_install_ncnn_and_pnnx:
 2. Install ncnn and pnnx
 ------------------------
 .. code-block:: bash
  # We put ncnn into $HOME/open-source/ncnn
  # You can change it to anywhere you like
  cd $HOME
  mkdir -p open-source
  cd open-source
  git clone https://github.com/csukuangfj/ncnn
  cd ncnn
  git submodule update --recursive --init
  # Note: We don't use "python setup.py install" or "pip install ." here
  mkdir -p build-wheel
  cd build-wheel
  cmake \
    -DCMAKE_BUILD_TYPE=Release \
    -DNCNN_PYTHON=ON \
    -DNCNN_BUILD_BENCHMARK=OFF \
    -DNCNN_BUILD_EXAMPLES=OFF \
    -DNCNN_BUILD_TOOLS=ON \
  ..
  make -j4
  cd ..
  # Note: $PWD here is $HOME/open-source/ncnn
  export PYTHONPATH=$PWD/python:$PYTHONPATH
  export PATH=$PWD/tools/pnnx/build/src:$PATH
  export PATH=$PWD/build-wheel/tools/quantize:$PATH
  # Now build pnnx
  cd tools/pnnx
  mkdir build
  cd build
  cmake ..
  make -j4
  ./src/pnnx
 Congratulations! You have successfully installed the following components:
  - ``pnnx``, which is an executable located in
    ``$HOME/open-source/ncnn/tools/pnnx/build/src``. We will use
    it to convert models exported by ``torch.jit.trace()``.
  - ``ncnn2int8``, which is an executable located in
    ``$HOME/open-source/ncnn/build-wheel/tools/quantize``. We will use
    it to quantize our models to ``int8``.
  - ``ncnn.cpython-38-x86_64-linux-gnu.so``, which is a Python module located
    in ``$HOME/open-source/ncnn/python/ncnn``.
    .. note::
      I am using ``Python 3.8``, so it
      is ``ncnn.cpython-38-x86_64-linux-gnu.so``. If you use a different
      version, say, ``Python 3.9``, the name would be
      ``ncnn.cpython-39-x86_64-linux-gnu.so``.
      Also, if you are not using Linux, the file name would also be different.
      But that does not matter. As long as you can compile it, it should work.
 We have set up ``PYTHONPATH`` so that you can use ``import ncnn`` in your
 Python code. We have also set up ``PATH`` so that you can use
 ``pnnx`` and ``ncnn2int8`` later in your terminal.
 .. caution::
  Please don't use `<https://github.com/tencent/ncnn>`_.
  We have made some modifications to the offical `ncnn`_.
  We will synchronize `<https://github.com/csukuangfj/ncnn>`_ periodically
  with the official one.
 3. Export the model via torch.jit.trace()
 -----------------------------------------
 First, let us rename our pre-trained model:
 .. code-block::
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp
  ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-30.pt
  cd ../..
 Next, we use the following code to export our model:
 .. code-block:: bash
  dir=./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/
  ./conv_emformer_transducer_stateless2/export-for-ncnn.py \
    --exp-dir $dir/exp \
    --bpe-model $dir/data/lang_bpe_500/bpe.model \
    --epoch 30 \
    --avg 1 \
    --use-averaged-model 0 \
    \
    --num-encoder-layers 12 \
    --chunk-length 32 \
    --cnn-module-kernel 31 \
    --left-context-length 32 \
    --right-context-length 8 \
    --memory-size 32 \
    --encoder-dim 512
 .. caution::
   If your model has different configuration parameters, please change them accordingly.
 .. hint::
  We have renamed our model to ``epoch-30.pt`` so that we can use ``--epoch 30``.
  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
  If you have trained a model by yourself and if you have all checkpoints
  available, please first use ``decode.py`` to tune ``--epoch --avg``
  and select the best combination with with ``--use-averaged-model 1``.
 .. note::
  You will see the following log output:
  .. literalinclude:: ./code/export-conv-emformer-transducer-for-ncnn-output.txt
  The log shows the model has ``75490012`` parameters, i.e., ``~75 M``.
  .. code-block::
    ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
    -rw-r--r-- 1 kuangfangjun root 289M Jan 11 12:05 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/pretrained-epoch-30-avg-10-averaged.pt
  You can see that the file size of the pre-trained model is ``289 MB``, which
  is roughly equal to ``75490012*4/1024/1024 = 287.97 MB``.
 After running ``conv_emformer_transducer_stateless2/export-for-ncnn.py``,
 we will get the following files:
 .. code-block:: bash
  ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*pnnx*
  -rw-r--r-- 1 kuangfangjun root 1010K Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  283M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  3.0M Jan 11 12:15 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.pt
 .. _conv-emformer-step-4-export-torchscript-model-via-pnnx:
 4. Export torchscript model via pnnx
 ------------------------------------
 .. hint::
  Make sure you have set up the ``PATH`` environment variable. Otherwise,
  it will throw an error saying that ``pnnx`` could not be found.
 Now, it's time to export our models to `ncnn`_ via ``pnnx``.
 .. code-block::
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
  pnnx ./encoder_jit_trace-pnnx.pt
  pnnx ./decoder_jit_trace-pnnx.pt
  pnnx ./joiner_jit_trace-pnnx.pt
 It will generate the following files:
 .. code-block:: bash
  ls -lh  icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*ncnn*{bin,param}
  -rw-r--r-- 1 kuangfangjun root 503K Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  437 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 142M Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  79K Jan 11 12:36 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 1.5M Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  488 Jan 11 12:38 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
 There are two types of files:
 - ``param``: It is a text file containing the model architectures. You can
  use a text editor to view its content.
 - ``bin``: It is a binary file containing the model parameters.
 We compare the file sizes of the models below before and after converting via ``pnnx``:
 .. see https://tableconvert.com/restructuredtext-generator
 +----------------------------------+------------+
 | File name                        | File size  |
 +==================================+============+
 | encoder_jit_trace-pnnx.pt        | 283 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.pt        | 1010 KB    |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.pt         | 3.0 MB     |
 +----------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin  | 142 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin  | 503 KB     |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin   | 1.5 MB     |
 +----------------------------------+------------+
 You can see that the file sizes of the models after conversion are about one half
 of the models before conversion:
  - encoder: 283 MB vs 142 MB
  - decoder: 1010 KB vs 503 KB
  - joiner: 3.0 MB vs 1.5 MB
 The reason is that by default ``pnnx`` converts ``float32`` parameters
 to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
 for ``float16``. Thus, it is ``twice smaller`` after conversion.
 .. hint::
  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
  won't convert ``float32`` to ``float16``.
 5. Test the exported models in icefall
 --------------------------------------
 .. note::
  We assume you have set up the environment variable ``PYTHONPATH`` when
  building `ncnn`_.
 Now we have successfully converted our pre-trained model to `ncnn`_ format.
 The generated 6 files are what we need. You can use the following code to
 test the converted models:
 .. code-block:: bash
  ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
    --tokens ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/data/lang_bpe_500/tokens.txt \
    --encoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param \
    --encoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin \
    --decoder-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param \
    --decoder-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin \
    --joiner-param-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param \
    --joiner-bin-filename ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin \
    ./icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/test_wavs/1089-134686-0001.wav
 .. hint::
  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
  only 1 wave file as input.
 The output is given below:
 .. literalinclude:: ./code/test-streaming-ncnn-decode-conv-emformer-transducer-libri.txt
 Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
 .. _conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn:
 6. Modify the exported encoder for sherpa-ncnn
 ----------------------------------------------
 In order to use the exported models in `sherpa-ncnn`_, we have to modify
 ``encoder_jit_trace-pnnx.ncnn.param``.
 Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
 .. code-block::
  7767517
  1060 1342
  Input                    in0                      0 1 in0
 **Explanation** of the above three lines:
  1. ``7767517``, it is a magic number and should not be changed.
  2. ``1060 1342``, the first number ``1060`` specifies the number of layers
     in this file, while ``1342`` specifies the number of intermediate outputs
     of this file
  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
     is the layer name of this layer; ``0`` means this layer has no input;
     ``1`` means this layer has one output; ``in0`` is the output name of
     this layer.
 We need to add 1 extra line and also increment the number of layers.
 The result looks like below:
 .. code-block:: bash
  7767517
  1061 1342
  SherpaMetaData           sherpa_meta_data1        0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
  Input                    in0                      0 1 in0
 **Explanation**
  1. ``7767517``, it is still the same
  2. ``1061 1342``, we have added an extra layer, so we need to update ``1060`` to ``1061``.
     We don't need to change ``1342`` since the newly added layer has no inputs or outputs.
  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512``
     This line is newly added. Its explanation is given below:
      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
      - ``0=1``, 0 is the key and 1 is the value. MUST be ``0=1``
      - ``1=12``, 1 is the key and 12 is the value of the
        parameter ``--num-encoder-layers`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``2=32``, 2 is the key and 32 is the value of the
        parameter ``--memory-size`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``3=31``, 3 is the key and 31 is the value of the
        parameter ``--cnn-module-kernel`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``4=8``, 4 is the key and 8 is the value of the
        parameter ``--left-context-length`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``5=32``, 5 is the key and 32 is the value of the
        parameter ``--chunk-length`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``6=8``, 6 is the key and 8 is the value of the
        parameter ``--right-context-length`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      - ``7=512``, 7 is the key and 512 is the value of the
        parameter ``--encoder-dim`` that you provided when running
        ``conv_emformer_transducer_stateless2/export-for-ncnn.py``.
      For ease of reference, we list the key-value pairs that you need to add
      in the following table. If your model has a different setting, please
      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
      will be ``SAD``.
          +------+-----------------------------+
          | key  | value                       |
          +======+=============================+
          | 0    | 1 (fixed)                   |
          +------+-----------------------------+
          | 1    | ``--num-encoder-layers``    |
          +------+-----------------------------+
          | 2    | ``--memory-size``           |
          +------+-----------------------------+
          | 3    | ``--cnn-module-kernel``     |
          +------+-----------------------------+
          | 4    | ``--left-context-length``   |
          +------+-----------------------------+
          | 5    | ``--chunk-length``          |
          +------+-----------------------------+
          | 6    | ``--right-context-length``  |
          +------+-----------------------------+
          | 7    | ``--encoder-dim``           |
          +------+-----------------------------+
  4. ``Input in0 0 1 in0``. No need to change it.
 .. caution::
  When you add a new layer ``SherpaMetaData``, please remember to update the
  number of layers. In our case, update  ``1060`` to ``1061``. Otherwise,
  you will be SAD later.
 .. hint::
  After adding the new layer ``SherpaMetaData``, you cannot use this model
  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
  supported only in `sherpa-ncnn`_.
 .. hint::
  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
  the ``param`` file! You don't need to change the ``bin`` file.
 Now you can use this model in `sherpa-ncnn`_.
 Please refer to the following documentation:
  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
 We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
    You can find more usages there.
 7. (Optional) int8 quantization with sherpa-ncnn
 ------------------------------------------------
 This step is optional.
 In this step, we describe how to quantize our model with ``int8``.
 Change :ref:`conv-emformer-step-4-export-torchscript-model-via-pnnx` to
 disable ``fp16`` when using ``pnnx``:
 .. code-block::
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
  pnnx ./encoder_jit_trace-pnnx.pt fp16=0
  pnnx ./decoder_jit_trace-pnnx.pt
  pnnx ./joiner_jit_trace-pnnx.pt fp16=0
 .. note::
  We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
  support quantizing the decoder model yet. We will update this documentation
  once `ncnn`_ supports it. (Maybe in this year, 2023).
 It will generate the following files
 .. code-block:: bash
  ls -lh icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/*_jit_trace-pnnx.ncnn.{param,bin}
  -rw-r--r-- 1 kuangfangjun root 503K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  437 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/decoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 283M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  79K Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/encoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 3.0M Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  488 Jan 11 15:56 icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/joiner_jit_trace-pnnx.ncnn.param
 Let us compare again the file sizes:
 +----------------------------------------+------------+
 | File name                              | File size  |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.pt              | 283 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.pt              | 1010 KB    |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.pt               | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
 +----------------------------------------+------------+
 You can see that the file sizes are doubled when we disable ``fp16``.
 .. note::
  You can again use ``streaming-ncnn-decode.py`` to test the exported models.
 Next, follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
 to modify ``encoder_jit_trace-pnnx.ncnn.param``.
 Change
 .. code-block:: bash
  7767517
  1060 1342
  Input                    in0                      0 1 in0
 to
 .. code-block:: bash
  7767517
  1061 1342
  SherpaMetaData           sherpa_meta_data1        0 0 0=1 1=12 2=32 3=31 4=8 5=32 6=8 7=512
  Input                    in0                      0 1 in0
 .. caution::
  Please follow :ref:`conv-emformer-modify-the-exported-encoder-for-sherpa-ncnn`
  to change the values for ``SherpaMetaData`` if your model uses a different setting.
 Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
 `sherpa-ncnn`_.
 .. code-block:: bash
  # We will download sherpa-ncnn to $HOME/open-source/
  # You can change it to anywhere you like.
  cd $HOME
  mkdir -p open-source
  cd open-source
  git clone https://github.com/k2-fsa/sherpa-ncnn
  cd sherpa-ncnn
  mkdir build
  cd build
  cmake ..
  make -j 4
  ./bin/generate-int8-scale-table
  export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
 The output of the above commands are:
 .. code-block:: bash
  (py38) kuangfangjun:build$ generate-int8-scale-table
  Please provide 10 arg. Currently given: 1
  Usage:
  generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
  Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
 We need to create a file ``wave_filenames.txt``, in which we need to put
 some calibration wave files. For testing purpose, we put the ``test_wavs``
 from the pre-trained model repository `<https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05>`_
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
  cat <<EOF > wave_filenames.txt
  ../test_wavs/1089-134686-0001.wav
  ../test_wavs/1221-135766-0001.wav
  ../test_wavs/1221-135766-0002.wav
  EOF
 Now we can calculate the scales needed for quantization with the calibration data:
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
  generate-int8-scale-table \
    ./encoder_jit_trace-pnnx.ncnn.param \
    ./encoder_jit_trace-pnnx.ncnn.bin \
    ./decoder_jit_trace-pnnx.ncnn.param \
    ./decoder_jit_trace-pnnx.ncnn.bin \
    ./joiner_jit_trace-pnnx.ncnn.param \
    ./joiner_jit_trace-pnnx.ncnn.bin \
    ./encoder-scale-table.txt \
    ./joiner-scale-table.txt \
    ./wave_filenames.txt
 The output logs are in the following:
 .. literalinclude:: ./code/generate-int-8-scale-table-for-conv-emformer.txt
 It generates the following two files:
 .. code-block:: bash
  $ ls -lh encoder-scale-table.txt joiner-scale-table.txt
  -rw-r--r-- 1 kuangfangjun root 955K Jan 11 17:28 encoder-scale-table.txt
  -rw-r--r-- 1 kuangfangjun root  18K Jan 11 17:28 joiner-scale-table.txt
 .. caution::
  Definitely, you need more calibration data to compute the scale table.
 Finally, let us use the scale table to quantize our models into ``int8``.
 .. code-block:: bash
  ncnn2int8
  usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
 First, we quantize the encoder model:
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
  ncnn2int8 \
    ./encoder_jit_trace-pnnx.ncnn.param \
    ./encoder_jit_trace-pnnx.ncnn.bin \
    ./encoder_jit_trace-pnnx.ncnn.int8.param \
    ./encoder_jit_trace-pnnx.ncnn.int8.bin \
    ./encoder-scale-table.txt
 Next, we quantize the joiner model:
 .. code-block:: bash
  ncnn2int8 \
    ./joiner_jit_trace-pnnx.ncnn.param \
    ./joiner_jit_trace-pnnx.ncnn.bin \
    ./joiner_jit_trace-pnnx.ncnn.int8.param \
    ./joiner_jit_trace-pnnx.ncnn.int8.bin \
    ./joiner-scale-table.txt
 The above two commands generate the following 4 files:
 .. code-block:: bash
  -rw-r--r-- 1 kuangfangjun root  99M Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.bin
  -rw-r--r-- 1 kuangfangjun root  78K Jan 11 17:34 encoder_jit_trace-pnnx.ncnn.int8.param
  -rw-r--r-- 1 kuangfangjun root 774K Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.bin
  -rw-r--r-- 1 kuangfangjun root  496 Jan 11 17:35 joiner_jit_trace-pnnx.ncnn.int8.param
 Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
 .. caution::
  ``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
  You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
  and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
  For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
  replace the following invocation:
    .. code-block:: bash
      cd egs/librispeech/ASR
      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
      sherpa-ncnn \
        ../data/lang_bpe_500/tokens.txt \
        ./encoder_jit_trace-pnnx.ncnn.param \
        ./encoder_jit_trace-pnnx.ncnn.bin \
        ./decoder_jit_trace-pnnx.ncnn.param \
        ./decoder_jit_trace-pnnx.ncnn.bin \
        ./joiner_jit_trace-pnnx.ncnn.param \
        ./joiner_jit_trace-pnnx.ncnn.bin \
        ../test_wavs/1089-134686-0001.wav
  with
    .. code-block::
      cd egs/librispeech/ASR
      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
      sherpa-ncnn \
        ../data/lang_bpe_500/tokens.txt \
        ./encoder_jit_trace-pnnx.ncnn.int8.param \
        ./encoder_jit_trace-pnnx.ncnn.int8.bin \
        ./decoder_jit_trace-pnnx.ncnn.param \
        ./decoder_jit_trace-pnnx.ncnn.bin \
        ./joiner_jit_trace-pnnx.ncnn.param \
        ./joiner_jit_trace-pnnx.ncnn.bin \
        ../test_wavs/1089-134686-0001.wav
 The following table compares again the file sizes:
 +----------------------------------------+------------+
 | File name                              | File size  |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.pt              | 283 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.pt              | 1010 KB    |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.pt               | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp16) | 142 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp32) | 283 MB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.int8.bin   | 99 MB      |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.int8.bin    | 774 KB     |
 +----------------------------------------+------------+
 You can see that the file sizes of the model after ``int8`` quantization
 are much smaller.
 .. hint::
    Currently, only linear layers and convolutional layers are quantized
    with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
 .. note::
  You need to test the recognition accuracy after ``int8`` quantization.
 You can find the speed comparison at `<https://github.com/k2-fsa/sherpa-ncnn/issues/44>`_.
 That's it! Have fun with `sherpa-ncnn`_!
--- a/docs/source/model-export/export-ncnn-lstm.rst
+++ b/docs/source/model-export/export-ncnn-lstm.rst
@ -0,0 +1,644 @@
 .. _export_lstm_transducer_models_to_ncnn:
 Export LSTM transducer models to ncnn
 -------------------------------------
 We use the pre-trained model from the following repository as an example:
 `<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
 We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
 .. hint::
  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
 .. caution::
  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
  may ``not`` work.
 1. Download the pre-trained model
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. hint::
  You have to install `git-lfs`_ before you continue.
 .. code-block:: bash
  cd egs/librispeech/ASR
  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
  git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
  git lfs pull --include "data/lang_bpe_500/bpe.model"
  cd ..
 .. note::
  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
 In the above code, we downloaded the pre-trained model into the directory
 ``egs/librispeech/ASR/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03``.
 2. Install ncnn and pnnx
 ^^^^^^^^^^^^^^^^^^^^^^^^
 Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
 3. Export the model via torch.jit.trace()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 First, let us rename our pre-trained model:
 .. code-block::
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp
  ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
  cd ../..
 Next, we use the following code to export our model:
 .. code-block:: bash
  dir=./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
  ./lstm_transducer_stateless2/export-for-ncnn.py \
    --exp-dir $dir/exp \
    --bpe-model $dir/data/lang_bpe_500/bpe.model \
    --epoch 99 \
    --avg 1 \
    --use-averaged-model 0 \
    --num-encoder-layers 12 \
    --encoder-dim 512 \
    --rnn-hidden-size 1024
 .. hint::
  We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
  If you have trained a model by yourself and if you have all checkpoints
  available, please first use ``decode.py`` to tune ``--epoch --avg``
  and select the best combination with with ``--use-averaged-model 1``.
 .. note::
  You will see the following log output:
  .. literalinclude:: ./code/export-lstm-transducer-for-ncnn-output.txt
  The log shows the model has ``84176356`` parameters, i.e., ``~84 M``.
  .. code-block::
    ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
    -rw-r--r-- 1 kuangfangjun root 324M Feb 17 10:34 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/pretrained-iter-468000-avg-16.pt
  You can see that the file size of the pre-trained model is ``324 MB``, which
  is roughly equal to ``84176356*4/1024/1024 = 321.107 MB``.
 After running ``lstm_transducer_stateless2/export-for-ncnn.py``,
 we will get the following files:
 .. code-block:: bash
  ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*pnnx.pt
  -rw-r--r-- 1 kuangfangjun root 1010K Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  318M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  3.0M Feb 17 11:22 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.pt
 .. _lstm-transducer-step-4-export-torchscript-model-via-pnnx:
 4. Export torchscript model via pnnx
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. hint::
  Make sure you have set up the ``PATH`` environment variable
  in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
  it will throw an error saying that ``pnnx`` could not be found.
 Now, it's time to export our models to `ncnn`_ via ``pnnx``.
 .. code-block::
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
  pnnx ./encoder_jit_trace-pnnx.pt
  pnnx ./decoder_jit_trace-pnnx.pt
  pnnx ./joiner_jit_trace-pnnx.pt
 It will generate the following files:
 .. code-block:: bash
  ls -lh  icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*ncnn*{bin,param}
  -rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 159M Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 1.5M Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  488 Feb 17 11:33 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
 There are two types of files:
 - ``param``: It is a text file containing the model architectures. You can
  use a text editor to view its content.
 - ``bin``: It is a binary file containing the model parameters.
 We compare the file sizes of the models below before and after converting via ``pnnx``:
 .. see https://tableconvert.com/restructuredtext-generator
 +----------------------------------+------------+
 | File name                        | File size  |
 +==================================+============+
 | encoder_jit_trace-pnnx.pt        | 318 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.pt        | 1010 KB    |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.pt         | 3.0 MB     |
 +----------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin  | 159 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin  | 503 KB     |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin   | 1.5 MB     |
 +----------------------------------+------------+
 You can see that the file sizes of the models after conversion are about one half
 of the models before conversion:
  - encoder: 318 MB vs 159 MB
  - decoder: 1010 KB vs 503 KB
  - joiner: 3.0 MB vs 1.5 MB
 The reason is that by default ``pnnx`` converts ``float32`` parameters
 to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
 for ``float16``. Thus, it is ``twice smaller`` after conversion.
 .. hint::
  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
  won't convert ``float32`` to ``float16``.
 5. Test the exported models in icefall
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. note::
  We assume you have set up the environment variable ``PYTHONPATH`` when
  building `ncnn`_.
 Now we have successfully converted our pre-trained model to `ncnn`_ format.
 The generated 6 files are what we need. You can use the following code to
 test the converted models:
 .. code-block:: bash
  python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
    --tokens ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/data/lang_bpe_500/tokens.txt \
    --encoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param \
    --encoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin \
    --decoder-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param \
    --decoder-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin \
    --joiner-param-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param \
    --joiner-bin-filename ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin \
    ./icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/test_wavs/1089-134686-0001.wav
 .. hint::
  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
  only 1 wave file as input.
 The output is given below:
 .. literalinclude:: ./code/test-streaming-ncnn-decode-lstm-transducer-libri.txt
 Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
 .. _lstm-modify-the-exported-encoder-for-sherpa-ncnn:
 6. Modify the exported encoder for sherpa-ncnn
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 In order to use the exported models in `sherpa-ncnn`_, we have to modify
 ``encoder_jit_trace-pnnx.ncnn.param``.
 Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
 .. code-block::
  7767517
  267 379
  Input                    in0                      0 1 in0
 **Explanation** of the above three lines:
  1. ``7767517``, it is a magic number and should not be changed.
  2. ``267 379``, the first number ``267`` specifies the number of layers
     in this file, while ``379`` specifies the number of intermediate outputs
     of this file
  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
     is the layer name of this layer; ``0`` means this layer has no input;
     ``1`` means this layer has one output; ``in0`` is the output name of
     this layer.
 We need to add 1 extra line and also increment the number of layers.
 The result looks like below:
 .. code-block:: bash
  7767517
  268 379
  SherpaMetaData           sherpa_meta_data1        0 0 0=3 1=12 2=512 3=1024
  Input                    in0                      0 1 in0
 **Explanation**
  1. ``7767517``, it is still the same
  2. ``268 379``, we have added an extra layer, so we need to update ``267`` to ``268``.
     We don't need to change ``379`` since the newly added layer has no inputs or outputs.
  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=3 1=12 2=512 3=1024``
     This line is newly added. Its explanation is given below:
      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
      - ``0=3``, 0 is the key and 3 is the value. MUST be ``0=3``
      - ``1=12``, 1 is the key and 12 is the value of the
        parameter ``--num-encoder-layers`` that you provided when running
        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
      - ``2=512``, 2 is the key and 512 is the value of the
        parameter ``--encoder-dim`` that you provided when running
        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
      - ``3=1024``, 3 is the key and 1024 is the value of the
        parameter ``--rnn-hidden-size`` that you provided when running
        ``./lstm_transducer_stateless2/export-for-ncnn.py``.
      For ease of reference, we list the key-value pairs that you need to add
      in the following table. If your model has a different setting, please
      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
      will be ``SAD``.
          +------+-----------------------------+
          | key  | value                       |
          +======+=============================+
          | 0    | 3 (fixed)                   |
          +------+-----------------------------+
          | 1    | ``--num-encoder-layers``    |
          +------+-----------------------------+
          | 2    | ``--encoder-dim``           |
          +------+-----------------------------+
          | 3    | ``--rnn-hidden-size``       |
          +------+-----------------------------+
  4. ``Input in0 0 1 in0``. No need to change it.
 .. caution::
  When you add a new layer ``SherpaMetaData``, please remember to update the
  number of layers. In our case, update  ``267`` to ``268``. Otherwise,
  you will be SAD later.
 .. hint::
  After adding the new layer ``SherpaMetaData``, you cannot use this model
  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
  supported only in `sherpa-ncnn`_.
 .. hint::
  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
  the ``param`` file! You don't need to change the ``bin`` file.
 Now you can use this model in `sherpa-ncnn`_.
 Please refer to the following documentation:
  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
 We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
    You can find more usages there.
 7. (Optional) int8 quantization with sherpa-ncnn
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 This step is optional.
 In this step, we describe how to quantize our model with ``int8``.
 Change :ref:`lstm-transducer-step-4-export-torchscript-model-via-pnnx` to
 disable ``fp16`` when using ``pnnx``:
 .. code-block::
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
  pnnx ./encoder_jit_trace-pnnx.pt fp16=0
  pnnx ./decoder_jit_trace-pnnx.pt
  pnnx ./joiner_jit_trace-pnnx.pt fp16=0
 .. note::
  We add ``fp16=0`` when exporting the encoder and joiner. `ncnn`_ does not
  support quantizing the decoder model yet. We will update this documentation
  once `ncnn`_ supports it. (Maybe in this year, 2023).
 .. code-block:: bash
  ls -lh icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/*_jit_trace-pnnx.ncnn.{param,bin}
  -rw-r--r-- 1 kuangfangjun root 503K Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  437 Feb 17 11:32 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/decoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 317M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/encoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 3.0M Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  488 Feb 17 11:54 icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/joiner_jit_trace-pnnx.ncnn.param
 Let us compare again the file sizes:
 +----------------------------------------+------------+
 | File name                              | File size  |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.pt              | 318 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.pt              | 1010 KB    |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.pt               | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
 +----------------------------------------+------------+
 You can see that the file sizes are doubled when we disable ``fp16``.
 .. note::
  You can again use ``streaming-ncnn-decode.py`` to test the exported models.
 Next, follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
 to modify ``encoder_jit_trace-pnnx.ncnn.param``.
 Change
 .. code-block:: bash
  7767517
  267 379
  Input                    in0                      0 1 in0
 to
 .. code-block:: bash
  7767517
  268 379
  SherpaMetaData           sherpa_meta_data1        0 0 0=3 1=12 2=512 3=1024
  Input                    in0                      0 1 in0
 .. caution::
  Please follow :ref:`lstm-modify-the-exported-encoder-for-sherpa-ncnn`
  to change the values for ``SherpaMetaData`` if your model uses a different setting.
 Next, let us compile `sherpa-ncnn`_ since we will quantize our models within
 `sherpa-ncnn`_.
 .. code-block:: bash
  # We will download sherpa-ncnn to $HOME/open-source/
  # You can change it to anywhere you like.
  cd $HOME
  mkdir -p open-source
  cd open-source
  git clone https://github.com/k2-fsa/sherpa-ncnn
  cd sherpa-ncnn
  mkdir build
  cd build
  cmake ..
  make -j 4
  ./bin/generate-int8-scale-table
  export PATH=$HOME/open-source/sherpa-ncnn/build/bin:$PATH
 The output of the above commands are:
 .. code-block:: bash
  (py38) kuangfangjun:build$ generate-int8-scale-table
  Please provide 10 arg. Currently given: 1
  Usage:
  generate-int8-scale-table encoder.param encoder.bin decoder.param decoder.bin joiner.param joiner.bin encoder-scale-table.txt joiner-scale-table.txt wave_filenames.txt
  Each line in wave_filenames.txt is a path to some 16k Hz mono wave file.
 We need to create a file ``wave_filenames.txt``, in which we need to put
 some calibration wave files. For testing purpose, we put the ``test_wavs``
 from the pre-trained model repository
 `<https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03>`_
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
  cat <<EOF > wave_filenames.txt
  ../test_wavs/1089-134686-0001.wav
  ../test_wavs/1221-135766-0001.wav
  ../test_wavs/1221-135766-0002.wav
  EOF
 Now we can calculate the scales needed for quantization with the calibration data:
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
  generate-int8-scale-table \
    ./encoder_jit_trace-pnnx.ncnn.param \
    ./encoder_jit_trace-pnnx.ncnn.bin \
    ./decoder_jit_trace-pnnx.ncnn.param \
    ./decoder_jit_trace-pnnx.ncnn.bin \
    ./joiner_jit_trace-pnnx.ncnn.param \
    ./joiner_jit_trace-pnnx.ncnn.bin \
    ./encoder-scale-table.txt \
    ./joiner-scale-table.txt \
    ./wave_filenames.txt
 The output logs are in the following:
 .. literalinclude:: ./code/generate-int-8-scale-table-for-lstm.txt
 It generates the following two files:
 .. code-block:: bash
  ls -lh encoder-scale-table.txt joiner-scale-table.txt
  -rw-r--r-- 1 kuangfangjun root 345K Feb 17 12:13 encoder-scale-table.txt
  -rw-r--r-- 1 kuangfangjun root  17K Feb 17 12:13 joiner-scale-table.txt
 .. caution::
  Definitely, you need more calibration data to compute the scale table.
 Finally, let us use the scale table to quantize our models into ``int8``.
 .. code-block:: bash
  ncnn2int8
  usage: ncnn2int8 [inparam] [inbin] [outparam] [outbin] [calibration table]
 First, we quantize the encoder model:
 .. code-block:: bash
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
  ncnn2int8 \
    ./encoder_jit_trace-pnnx.ncnn.param \
    ./encoder_jit_trace-pnnx.ncnn.bin \
    ./encoder_jit_trace-pnnx.ncnn.int8.param \
    ./encoder_jit_trace-pnnx.ncnn.int8.bin \
    ./encoder-scale-table.txt
 Next, we quantize the joiner model:
 .. code-block:: bash
  ncnn2int8 \
    ./joiner_jit_trace-pnnx.ncnn.param \
    ./joiner_jit_trace-pnnx.ncnn.bin \
    ./joiner_jit_trace-pnnx.ncnn.int8.param \
    ./joiner_jit_trace-pnnx.ncnn.int8.bin \
    ./joiner-scale-table.txt
 The above two commands generate the following 4 files:
 .. code-block::
  -rw-r--r-- 1 kuangfangjun root 218M Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.bin
  -rw-r--r-- 1 kuangfangjun root  21K Feb 17 12:19 encoder_jit_trace-pnnx.ncnn.int8.param
  -rw-r--r-- 1 kuangfangjun root 774K Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.bin
  -rw-r--r-- 1 kuangfangjun root  496 Feb 17 12:19 joiner_jit_trace-pnnx.ncnn.int8.param
 Congratulations! You have successfully quantized your model from ``float32`` to ``int8``.
 .. caution::
  ``ncnn.int8.param`` and ``ncnn.int8.bin`` must be used in pairs.
  You can replace ``ncnn.param`` and ``ncnn.bin`` with ``ncnn.int8.param``
  and ``ncnn.int8.bin`` in `sherpa-ncnn`_ if you like.
  For instance, to use only the ``int8`` encoder in ``sherpa-ncnn``, you can
  replace the following invocation:
    .. code-block::
      cd egs/librispeech/ASR
      cd icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03/exp/
      sherpa-ncnn \
        ../data/lang_bpe_500/tokens.txt \
        ./encoder_jit_trace-pnnx.ncnn.param \
        ./encoder_jit_trace-pnnx.ncnn.bin \
        ./decoder_jit_trace-pnnx.ncnn.param \
        ./decoder_jit_trace-pnnx.ncnn.bin \
        ./joiner_jit_trace-pnnx.ncnn.param \
        ./joiner_jit_trace-pnnx.ncnn.bin \
        ../test_wavs/1089-134686-0001.wav
  with
    .. code-block:: bash
      cd egs/librispeech/ASR
      cd icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05/exp/
      sherpa-ncnn \
        ../data/lang_bpe_500/tokens.txt \
        ./encoder_jit_trace-pnnx.ncnn.int8.param \
        ./encoder_jit_trace-pnnx.ncnn.int8.bin \
        ./decoder_jit_trace-pnnx.ncnn.param \
        ./decoder_jit_trace-pnnx.ncnn.bin \
        ./joiner_jit_trace-pnnx.ncnn.param \
        ./joiner_jit_trace-pnnx.ncnn.bin \
        ../test_wavs/1089-134686-0001.wav
 The following table compares again the file sizes:
 +----------------------------------------+------------+
 | File name                              | File size  |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.pt              | 318 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.pt              | 1010 KB    |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.pt               | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp16) | 159 MB     |
 +----------------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin (fp16) | 503 KB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp16) | 1.5 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin (fp32) | 317 MB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin  (fp32) | 3.0 MB     |
 +----------------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.int8.bin   | 218 MB     |
 +----------------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.int8.bin    | 774 KB     |
 +----------------------------------------+------------+
 You can see that the file size of the joiner model after ``int8`` quantization
 is much smaller. However, the size of the encoder model is even larger than
 the ``fp16`` counterpart. The reason is that `ncnn`_ currently does not support
 quantizing ``LSTM`` layers into ``8-bit``. Please see
 `<https://github.com/Tencent/ncnn/issues/4532>`_
 .. hint::
    Currently, only linear layers and convolutional layers are quantized
    with ``int8``, so you don't see an exact ``4x`` reduction in file sizes.
 .. note::
  You need to test the recognition accuracy after ``int8`` quantization.
 That's it! Have fun with `sherpa-ncnn`_!
--- a/docs/source/model-export/export-ncnn-zipformer.rst
+++ b/docs/source/model-export/export-ncnn-zipformer.rst
@ -0,0 +1,383 @@
 .. _export_streaming_zipformer_transducer_models_to_ncnn:
 Export streaming Zipformer transducer models to ncnn
 ----------------------------------------------------
 We use the pre-trained model from the following repository as an example:
 `<https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_
 We will show you step by step how to export it to `ncnn`_ and run it with `sherpa-ncnn`_.
 .. hint::
  We use ``Ubuntu 18.04``, ``torch 1.13``, and ``Python 3.8`` for testing.
 .. caution::
  Please use a more recent version of PyTorch. For instance, ``torch 1.8``
  may ``not`` work.
 1. Download the pre-trained model
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. hint::
  You have to install `git-lfs`_ before you continue.
 .. code-block:: bash
  cd egs/librispeech/ASR
  GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
  git lfs pull --include "exp/pretrained.pt"
  git lfs pull --include "data/lang_bpe_500/bpe.model"
  cd ..
 .. note::
  We downloaded ``exp/pretrained-xxx.pt``, not ``exp/cpu-jit_xxx.pt``.
 In the above code, we downloaded the pre-trained model into the directory
 ``egs/librispeech/ASR/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29``.
 2. Install ncnn and pnnx
 ^^^^^^^^^^^^^^^^^^^^^^^^
 Please refer to :ref:`export_for_ncnn_install_ncnn_and_pnnx` .
 3. Export the model via torch.jit.trace()
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 First, let us rename our pre-trained model:
 .. code-block::
  cd egs/librispeech/ASR
  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
  ln -s pretrained.pt epoch-99.pt
  cd ../..
 Next, we use the following code to export our model:
 .. code-block:: bash
  dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
  ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
    --bpe-model $dir/data/lang_bpe_500/bpe.model \
    --exp-dir $dir/exp \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    \
    --decode-chunk-len 32 \
    --num-left-chunks 4 \
    --num-encoder-layers "2,4,3,2,4" \
    --feedforward-dims "1024,1024,2048,2048,1024" \
    --nhead "8,8,8,8,8" \
    --encoder-dims "384,384,384,384,384" \
    --attention-dims "192,192,192,192,192" \
    --encoder-unmasked-dims "256,256,256,256,256" \
    --zipformer-downsampling-factors "1,2,4,8,2" \
    --cnn-module-kernels "31,31,31,31,31" \
    --decoder-dim 512 \
    --joiner-dim 512
 .. caution::
   If your model has different configuration parameters, please change them accordingly.
 .. hint::
  We have renamed our model to ``epoch-99.pt`` so that we can use ``--epoch 99``.
  There is only one pre-trained model, so we use ``--avg 1 --use-averaged-model 0``.
  If you have trained a model by yourself and if you have all checkpoints
  available, please first use ``decode.py`` to tune ``--epoch --avg``
  and select the best combination with with ``--use-averaged-model 1``.
 .. note::
  You will see the following log output:
  .. literalinclude:: ./code/export-zipformer-transducer-for-ncnn-output.txt
  The log shows the model has ``69920376`` parameters, i.e., ``~69.9 M``.
  .. code-block:: bash
   ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
   -rw-r--r-- 1 kuangfangjun root 269M Jan 12 12:53 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/pretrained.pt
  You can see that the file size of the pre-trained model is ``269 MB``, which
  is roughly equal to ``69920376*4/1024/1024 = 266.725 MB``.
 After running ``pruned_transducer_stateless7_streaming/export-for-ncnn.py``,
 we will get the following files:
 .. code-block:: bash
  ls -lh icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*pnnx.pt
  -rw-r--r-- 1 kuangfangjun root 1022K Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  266M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.pt
  -rw-r--r-- 1 kuangfangjun root  2.8M Feb 27 20:23 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.pt
 .. _zipformer-transducer-step-4-export-torchscript-model-via-pnnx:
 4. Export torchscript model via pnnx
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. hint::
  Make sure you have set up the ``PATH`` environment variable
  in :ref:`export_for_ncnn_install_ncnn_and_pnnx`. Otherwise,
  it will throw an error saying that ``pnnx`` could not be found.
 Now, it's time to export our models to `ncnn`_ via ``pnnx``.
 .. code-block::
  cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
  pnnx ./encoder_jit_trace-pnnx.pt
  pnnx ./decoder_jit_trace-pnnx.pt
  pnnx ./joiner_jit_trace-pnnx.pt
 It will generate the following files:
 .. code-block:: bash
  ls -lh  icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/*ncnn*{bin,param}
  -rw-r--r-- 1 kuangfangjun root 509K Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  437 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 133M Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root 152K Feb 27 20:30 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param
  -rw-r--r-- 1 kuangfangjun root 1.4M Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin
  -rw-r--r-- 1 kuangfangjun root  488 Feb 27 20:31 icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param
 There are two types of files:
 - ``param``: It is a text file containing the model architectures. You can
  use a text editor to view its content.
 - ``bin``: It is a binary file containing the model parameters.
 We compare the file sizes of the models below before and after converting via ``pnnx``:
 .. see https://tableconvert.com/restructuredtext-generator
 +----------------------------------+------------+
 | File name                        | File size  |
 +==================================+============+
 | encoder_jit_trace-pnnx.pt        | 266 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.pt        | 1022 KB    |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.pt         | 2.8 MB     |
 +----------------------------------+------------+
 | encoder_jit_trace-pnnx.ncnn.bin  | 133 MB     |
 +----------------------------------+------------+
 | decoder_jit_trace-pnnx.ncnn.bin  | 509 KB     |
 +----------------------------------+------------+
 | joiner_jit_trace-pnnx.ncnn.bin   | 1.4 MB     |
 +----------------------------------+------------+
 You can see that the file sizes of the models after conversion are about one half
 of the models before conversion:
  - encoder: 266 MB vs 133 MB
  - decoder: 1022 KB vs 509 KB
  - joiner: 2.8 MB vs 1.4 MB
 The reason is that by default ``pnnx`` converts ``float32`` parameters
 to ``float16``. A ``float32`` parameter occupies 4 bytes, while it is 2 bytes
 for ``float16``. Thus, it is ``twice smaller`` after conversion.
 .. hint::
  If you use ``pnnx ./encoder_jit_trace-pnnx.pt fp16=0``, then ``pnnx``
  won't convert ``float32`` to ``float16``.
 5. Test the exported models in icefall
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. note::
  We assume you have set up the environment variable ``PYTHONPATH`` when
  building `ncnn`_.
 Now we have successfully converted our pre-trained model to `ncnn`_ format.
 The generated 6 files are what we need. You can use the following code to
 test the converted models:
 .. code-block:: bash
  python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
    --tokens ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/tokens.txt \
    --encoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.param \
    --encoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/encoder_jit_trace-pnnx.ncnn.bin \
    --decoder-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.param \
    --decoder-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/decoder_jit_trace-pnnx.ncnn.bin \
    --joiner-param-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.param \
    --joiner-bin-filename ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/joiner_jit_trace-pnnx.ncnn.bin \
    ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/test_wavs/1089-134686-0001.wav
 .. hint::
  `ncnn`_ supports only ``batch size == 1``, so ``streaming-ncnn-decode.py`` accepts
  only 1 wave file as input.
 The output is given below:
 .. literalinclude:: ./code/test-streaming-ncnn-decode-zipformer-transducer-libri.txt
 Congratulations! You have successfully exported a model from PyTorch to `ncnn`_!
 .. _zipformer-modify-the-exported-encoder-for-sherpa-ncnn:
 6. Modify the exported encoder for sherpa-ncnn
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 In order to use the exported models in `sherpa-ncnn`_, we have to modify
 ``encoder_jit_trace-pnnx.ncnn.param``.
 Let us have a look at the first few lines of ``encoder_jit_trace-pnnx.ncnn.param``:
 .. code-block::
  7767517
  2028 2547
  Input                    in0                      0 1 in0
 **Explanation** of the above three lines:
  1. ``7767517``, it is a magic number and should not be changed.
  2. ``2028 2547``, the first number ``2028`` specifies the number of layers
     in this file, while ``2547`` specifies the number of intermediate outputs
     of this file
  3. ``Input in0 0 1 in0``, ``Input`` is the layer type of this layer; ``in0``
     is the layer name of this layer; ``0`` means this layer has no input;
     ``1`` means this layer has one output; ``in0`` is the output name of
     this layer.
 We need to add 1 extra line and also increment the number of layers.
 The result looks like below:
 .. code-block:: bash
  7767517
  2029 2547
  SherpaMetaData           sherpa_meta_data1        0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31
  Input                    in0                      0 1 in0
 **Explanation**
  1. ``7767517``, it is still the same
  2. ``2029 2547``, we have added an extra layer, so we need to update ``2028`` to ``2029``.
     We don't need to change ``2547`` since the newly added layer has no inputs or outputs.
  3. ``SherpaMetaData  sherpa_meta_data1  0 0 0=2 1=32 2=4 3=7 -23316=5,2,4,3,2,4 -23317=5,384,384,384,384,384 -23318=5,192,192,192,192,192 -23319=5,1,2,4,8,2 -23320=5,31,31,31,31,31``
     This line is newly added. Its explanation is given below:
      - ``SherpaMetaData`` is the type of this layer. Must be ``SherpaMetaData``.
      - ``sherpa_meta_data1`` is the name of this layer. Must be ``sherpa_meta_data1``.
      - ``0 0`` means this layer has no inputs or output. Must be ``0 0``
      - ``0=2``, 0 is the key and 2 is the value. MUST be ``0=2``
      - ``1=32``, 1 is the key and 32 is the value of the
        parameter ``--decode-chunk-len`` that you provided when running
        ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``2=4``, 2 is the key and 4 is the value of the
        parameter ``--num-left-chunks`` that you provided when running
        ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``3=7``, 3 is the key and 7 is the value of for the amount of padding
        used in the Conv2DSubsampling layer. It should be 7 for zipformer
        if you don't change zipformer.py.
      - ``-23316=5,2,4,3,2,4``, attribute 16, this is an array attribute.
        It is attribute 16 since -23300 - (-23316) = 16.
        The first element of the array is the length of the array, which is 5 in our case.
        ``2,4,3,2,4`` is the value of ``--num-encoder-layers``that you provided
        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``-23317=5,384,384,384,384,384``, attribute 17.
        The first element of the array is the length of the array, which is 5 in our case.
        ``384,384,384,384,384`` is the value of ``--encoder-dims``that you provided
        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``-23318=5,192,192,192,192,192``, attribute 18.
        The first element of the array is the length of the array, which is 5 in our case.
        ``192,192,192,192,192`` is the value of ``--attention-dims`` that you provided
        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``-23319=5,1,2,4,8,2``, attribute 19.
        The first element of the array is the length of the array, which is 5 in our case.
        ``1,2,4,8,2`` is the value of ``--zipformer-downsampling-factors`` that you provided
        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      - ``-23320=5,31,31,31,31,31``, attribute 20.
        The first element of the array is the length of the array, which is 5 in our case.
        ``31,31,31,31,31`` is the value of ``--cnn-module-kernels`` that you provided
        when running ``./pruned_transducer_stateless7_streaming/export-for-ncnn.py``.
      For ease of reference, we list the key-value pairs that you need to add
      in the following table. If your model has a different setting, please
      change the values for ``SherpaMetaData`` accordingly. Otherwise, you
      will be ``SAD``.
          +----------+--------------------------------------------+
          | key      | value                                      |
          +==========+============================================+
          | 0        | 2 (fixed)                                  |
          +----------+--------------------------------------------+
          | 1        | ``-decode-chunk-len``                      |
          +----------+--------------------------------------------+
          | 2        | ``--num-left-chunks``                      |
          +----------+--------------------------------------------+
          | 3        | 7 (if you don't change code)               |
          +----------+--------------------------------------------+
          |-23316    | ``--num-encoder-layer``                    |
          +----------+--------------------------------------------+
          |-23317    | ``--encoder-dims``                         |
          +----------+--------------------------------------------+
          |-23318    | ``--attention-dims``                       |
          +----------+--------------------------------------------+
          |-23319    | ``--zipformer-downsampling-factors``       |
          +----------+--------------------------------------------+
          |-23320    | ``--cnn-module-kernels``                   |
          +----------+--------------------------------------------+
  4. ``Input in0 0 1 in0``. No need to change it.
 .. caution::
  When you add a new layer ``SherpaMetaData``, please remember to update the
  number of layers. In our case, update  ``2028`` to ``2029``. Otherwise,
  you will be SAD later.
 .. hint::
  After adding the new layer ``SherpaMetaData``, you cannot use this model
  with ``streaming-ncnn-decode.py`` anymore since ``SherpaMetaData`` is
  supported only in `sherpa-ncnn`_.
 .. hint::
  `ncnn`_ is very flexible. You can add new layers to it just by text-editing
  the ``param`` file! You don't need to change the ``bin`` file.
 Now you can use this model in `sherpa-ncnn`_.
 Please refer to the following documentation:
  - Linux/macOS/Windows/arm/aarch64: `<https://k2-fsa.github.io/sherpa/ncnn/install/index.html>`_
  - ``Android``: `<https://k2-fsa.github.io/sherpa/ncnn/android/index.html>`_
  - ``iOS``: `<https://k2-fsa.github.io/sherpa/ncnn/ios/index.html>`_
  - Python: `<https://k2-fsa.github.io/sherpa/ncnn/python/index.html>`_
 We have a list of pre-trained models that have been exported for `sherpa-ncnn`_:
  - `<https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html>`_
    You can find more usages there.
--- a/docs/source/model-export/export-ncnn.rst
+++ b/docs/source/model-export/export-ncnn.rst
@ -1,12 +1,37 @@
 Export to ncnn
 ==============
-We support exporting LSTM transducer models to `ncnn <https://github.com/tencent/ncnn>`_.
+We support exporting the following models
 to `ncnn <https://github.com/tencent/ncnn>`_:
-Please refer to :ref:`export-model-for-ncnn` for details.
+  - `Zipformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_
-We also provide `<https://github.com/k2-fsa/sherpa-ncnn>`_
+  - `LSTM transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/lstm_transducer_stateless2>`_
-performing speech recognition using ``ncnn`` with exported models.
+
-It has been tested on Linux, macOS, Windows, and Raspberry Pi. The project is
+  - `ConvEmformer transducer models <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conv_emformer_transducer_stateless2>`_
-self-contained and can be statically linked to produce a binary containing
+
-everything needed.
+We also provide `sherpa-ncnn`_
 for performing speech recognition using `ncnn`_ with exported models.
 It has been tested on the following platforms:
  - Linux
  - macOS
  - Windows
  - ``Android``
  - ``iOS``
  - ``Raspberry Pi``
  - `爱芯派 <https://wiki.sipeed.com/hardware/zh/>`_ (`MAIX-III AXera-Pi <https://wiki.sipeed.com/hardware/en/maixIII/ax-pi/axpi.html>`_).
  - `RV1126 <https://www.rock-chips.com/a/en/products/RV11_Series/2020/0427/1076.html>`_
 `sherpa-ncnn`_ is self-contained and can be statically linked to produce
 a binary containing everything needed. Please refer
 to its documentation for details:
 - `<https://k2-fsa.github.io/sherpa/ncnn/index.html>`_
 .. toctree::
   export-ncnn-zipformer
   export-ncnn-conv-emformer
   export-ncnn-lstm
--- a/docs/source/model-export/export-onnx.rst
+++ b/docs/source/model-export/export-onnx.rst
@ -1,69 +1,95 @@
 Export to ONNX
 ==============
-In this section, we describe how to export models to ONNX.
+In this section, we describe how to export models to `ONNX`_.
 In each recipe, there is a file called ``export-onnx.py``, which is used
 to export trained models to `ONNX`_.
 There is also a file named ``onnx_pretrained.py``, which you can use
 the exported `ONNX`_ model in Python with `onnxruntime`_ to decode sound files.
 sherpa-onnx
 -----------
 We have a separate repository `sherpa-onnx`_ for deploying your exported models
 on various platforms such as:
  - iOS
  - Android
  - Raspberry Pi
  - Linux/macOS/Windows
 Please see the documentation of `sherpa-onnx`_ for details:
  `<https://k2-fsa.github.io/sherpa/onnx/index.html>`_
 Example
 -------
 In the following, we demonstrate how to export a streaming Zipformer pre-trained
 model from
 `<https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11>`_
 to `ONNX`_.
 Download the pre-trained model
 ------------------------------
 .. hint::
-  Only non-streaming conformer transducer models are tested.
+   We assume you have installed `git-lfs`_.
 When to use it
 --------------
 It you want to use an inference framework that supports ONNX
 to run the pretrained model.
 How to export
 -------------
 We use
 `<https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless3>`_
 as an example in the following.
 .. code-block:: bash
    cd egs/librispeech/ASR
    epoch=14
    avg=2
-    ./pruned_transducer_stateless3/export.py \
+  cd egs/librispeech/ASR
      --exp-dir ./pruned_transducer_stateless3/exp \
      --bpe-model data/lang_bpe_500/bpe.model \
      --epoch $epoch \
      --avg $avg \
      --onnx 1
-It will generate the following files inside ``pruned_transducer_stateless3/exp``:
+  repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
-  - ``encoder.onnx``
+  pushd $repo
-  - ``decoder.onnx``
+  git lfs pull --include "data/lang_bpe_500/bpe.model"
-  - ``joiner.onnx``
+  git lfs pull --include "exp/pretrained.pt"
-  - ``joiner_encoder_proj.onnx``
+  cd exp
-  - ``joiner_decoder_proj.onnx``
+  ln -s pretrained.pt epoch-99.pt
  popd
-You can use ``./pruned_transducer_stateless3/exp/onnx_pretrained.py`` to decode
+Export the model to ONNX
-waves with the generated files:
+------------------------
 .. code-block:: bash
-  ./pruned_transducer_stateless3/onnx_pretrained.py \
+  ./pruned_transducer_stateless7_streaming/export-onnx.py \
-    --bpe-model ./data/lang_bpe_500/bpe.model \
+    --bpe-model $repo/data/lang_bpe_500/bpe.model \
-    --encoder-model-filename ./pruned_transducer_stateless3/exp/encoder.onnx \
+    --use-averaged-model 0 \
-    --decoder-model-filename ./pruned_transducer_stateless3/exp/decoder.onnx \
+    --epoch 99 \
-    --joiner-model-filename ./pruned_transducer_stateless3/exp/joiner.onnx \
+    --avg 1 \
-    --joiner-encoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_encoder_proj.onnx \
+    --decode-chunk-len 32 \
-    --joiner-decoder-proj-model-filename ./pruned_transducer_stateless3/exp/joiner_decoder_proj.onnx \
+    --exp-dir $repo/exp/
    /path/to/foo.wav \
    /path/to/bar.wav \
    /path/to/baz.wav
 .. warning::
-How to use the exported model
+   ``export-onnx.py`` from different recipes has different options.
 -----------------------------
-We also provide `<https://github.com/k2-fsa/sherpa-onnx>`_
+   In the above example, ``--decode-chunk-len`` is specific for the
-performing speech recognition using `onnxruntime <https://github.com/microsoft/onnxruntime>`_
+   streaming Zipformer. Other models won't have such an option.
-with exported models.
+
-It has been tested on Linux, macOS, and Windows.
+It will generate the following 3 files in ``$repo/exp``
  - ``encoder-epoch-99-avg-1.onnx``
  - ``decoder-epoch-99-avg-1.onnx``
  - ``joiner-epoch-99-avg-1.onnx``
 Decode sound files with exported ONNX models
 --------------------------------------------
 .. code-block:: bash
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
    --tokens $repo/data/lang_bpe_500/tokens.txt \
    $repo/test_wavs/1089-134686-0001.wav
--- a/docs/source/model-export/export-with-torch-jit-script.rst
+++ b/docs/source/model-export/export-with-torch-jit-script.rst
@ -1,7 +1,7 @@
 .. _export-model-with-torch-jit-script:
 Export model with torch.jit.script()
-===================================
+====================================
 In this section, we describe how to export a model via
 ``torch.jit.script()``.
--- a/docs/source/recipes/Non-streaming-ASR/aishell/conformer_ctc.rst
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/conformer_ctc.rst
@ -703,7 +703,7 @@ It will show you the following message:
 HLG decoding
-^^^^^^^^^^^^
+~~~~~~~~~~~~
 .. code-block:: bash
--- a/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-conformer-ctc-tensorboard-log.jpg
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-conformer-ctc-tensorboard-log.jpg
--- a/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg
--- a/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-transducer_stateless_modified-tensorboard-log.png
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-transducer_stateless_modified-tensorboard-log.png
--- a/docs/source/recipes/Non-streaming-ASR/aishell/index.rst
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/index.rst
@ -19,4 +19,3 @@ It can be downloaded from `<https://www.openslr.org/33/>`_
   tdnn_lstm_ctc
   conformer_ctc
   stateless_transducer
--- a/docs/source/recipes/Non-streaming-ASR/aishell/stateless_transducer.rst
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/stateless_transducer.rst
--- a/docs/source/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst
+++ b/docs/source/recipes/Non-streaming-ASR/aishell/tdnn_lstm_ctc.rst
--- a/docs/source/recipes/Non-streaming-ASR/index.rst
+++ b/docs/source/recipes/Non-streaming-ASR/index.rst
@ -0,0 +1,10 @@
 Non Streaming ASR
 =================
 .. toctree::
   :maxdepth: 2
   aishell/index
   librispeech/index
   timit/index
   yesno/index
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/conformer_ctc.rst
@ -888,7 +888,7 @@ It will show you the following message:
 CTC decoding
-^^^^^^^^^^^^
+~~~~~~~~~~~~
 .. code-block:: bash
@ -926,7 +926,7 @@ Its output is:
  YET THESE THOUGHTS AFFECTED HESTER PRYNNE LESS WITH HOPE THAN APPREHENSION
 HLG decoding
-^^^^^^^^^^^^
+~~~~~~~~~~~~
 .. code-block:: bash
@ -966,7 +966,7 @@ The output is:
 HLG decoding + n-gram LM rescoring
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. code-block:: bash
@ -1012,7 +1012,7 @@ The output is:
 HLG decoding + n-gram LM rescoring + attention decoder rescoring
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. code-block:: bash
--- a/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst
+++ b/docs/source/recipes/Non-streaming-ASR/librispeech/distillation.rst
@ -0,0 +1,223 @@
 Distillation with HuBERT
 ========================
 This tutorial shows you how to perform knowledge distillation in `icefall`_
 with the `LibriSpeech`_ dataset. The distillation method
 used here is called "Multi Vector Quantization Knowledge Distillation" (MVQ-KD).
 Please have a look at our paper `Predicting Multi-Codebook Vector Quantization Indexes for Knowledge Distillation <https://arxiv.org/abs/2211.00508>`_
 for more details about MVQ-KD.
 .. note::
    This tutorial is based on recipe
    `pruned_transducer_stateless4 <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless4>`_.
    Currently, we only implement MVQ-KD in this recipe. However, MVQ-KD is theoretically applicable to all recipes
    with only minor changes needed. Feel free to try out MVQ-KD in different recipes. If you
    encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`_.
 .. note::
  We assume you have read the page :ref:`install icefall` and have setup
  the environment for `icefall`_.
 .. HINT::
  We recommend you to use a GPU or several GPUs to run this recipe.
 Data preparation
 ----------------
 We first prepare necessary training data for `LibriSpeech`_.
 This is the same as in :ref:`non_streaming_librispeech_pruned_transducer_stateless`.
 .. hint::
   The data preparation is the same as other recipes on LibriSpeech dataset,
   if you have finished this step, you can skip to :ref:`codebook_index_preparation` directly.
 .. code-block:: bash
  $ cd egs/librispeech/ASR
  $ ./prepare.sh
 The script ``./prepare.sh`` handles the data preparation for you, **automagically**.
 All you need to do is to run it.
 The data preparation contains several stages, you can use the following two
 options:
  - ``--stage``
  - ``--stop-stage``
 to control which stage(s) should be run. By default, all stages are executed.
 For example,
 .. code-block:: bash
  $ cd egs/librispeech/ASR
  $ ./prepare.sh --stage 0 --stop-stage 0 # run only stage 0
  $ ./prepare.sh --stage 2 --stop-stage 5 # run from stage 2 to stage 5
 .. HINT::
  If you have pre-downloaded the `LibriSpeech`_
  dataset and the `musan`_ dataset, say,
  they are saved in ``/tmp/LibriSpeech`` and ``/tmp/musan``, you can modify
  the ``dl_dir`` variable in ``./prepare.sh`` to point to ``/tmp`` so that
  ``./prepare.sh`` won't re-download them.
 .. NOTE::
  All generated files by ``./prepare.sh``, e.g., features, lexicon, etc,
  are saved in ``./data`` directory.
 We provide the following YouTube video showing how to run ``./prepare.sh``.
 .. note::
   To get the latest news of `next-gen Kaldi <https://github.com/k2-fsa>`_, please subscribe
   the following YouTube channel by `Nadira Povey <https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_:
      `<https://www.youtube.com/channel/UC_VaumpkmINz1pNkFXAN9mw>`_
 ..  youtube:: ofEIoJL-mGM
 .. _codebook_index_preparation:
 Codebook index preparation
 --------------------------
 Here, we prepare necessary data for MVQ-KD. This requires the generation
 of codebook indexes (please read our `paper <https://arxiv.org/abs/2211.00508>`_.
 if you are interested in details). In this tutorial, we use the pre-computed
 codebook indexes for convenience. The only thing you need to do is to
 run `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_.
 .. note::
  There are 5 stages in total, the first and second stage will be automatically skipped
  when choosing to downloaded codebook indexes prepared by `icefall`_.
  Of course, you can extract and compute the codebook indexes by yourself. This
  will require you downloading a HuBERT-XL model and it can take a while for
  the extraction of codebook indexes.
 As usual, you can control the stages you want to run by specifying the following
 two options:
  - ``--stage``
  - ``--stop-stage``
 For example,
 .. code-block:: bash
  $ cd egs/librispeech/ASR
  $ ./distillation_with_hubert.sh --stage 0 --stop-stage 0 # run only stage 0
  $ ./distillation_with_hubert.sh --stage 2 --stop-stage 4 # run from stage 2 to stage 5
 Here are a few options in `./distillation_with_hubert.sh <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/distillation_with_hubert.sh>`_
 you need to know before you proceed.
 - ``--full_libri`` If True, use full 960h data. Otherwise only ``train-clean-100`` will be used
 - ``--use_extracted_codebook`` If True, the first two stages will be skipped and the codebook
  indexes uploaded by us will be downloaded.
 Since we are using the pre-computed codebook indexes, we set
 ``use_extracted_codebook=True``. If you want to do full `LibriSpeech`_
 experiments, please set ``full_libri=True``.
 The following command downloads the pre-computed codebook indexes
 and prepares MVQ-augmented training manifests.
 .. code-block:: bash
  $ ./distillation_with_hubert.sh --stage 2 --stop-stage 2 # run only stage 2
 Please see the
 following screenshot for the output of an example execution.
 .. figure:: ./images/distillation_codebook.png
  :width: 800
  :alt: Downloading codebook indexes and preparing training manifest.
  :align: center
  Downloading codebook indexes and preparing training manifest.
 .. hint::
  The codebook indexes we prepared for you in this tutorial
  are extracted from the 36-th layer of a fine-tuned HuBERT-XL model
  with 8 codebooks. If you want to try other configurations, please
  set ``use_extracted_codebook=False`` and set ``embedding_layer`` and
  ``num_codebooks`` by yourself.
 Now, you should see the following files under the directory ``./data/vq_fbank_layer36_cb8``.
 .. figure:: ./images/distillation_directory.png
  :width: 800
  :alt: MVQ-augmented training manifests
  :align: center
  MVQ-augmented training manifests.
 Whola! You are ready to perform knowledge distillation training now!
 Training
 --------
 To perform training, please run stage 3 by executing the following command.
 .. code-block:: bash
  $ ./prepare.sh --stage 3 --stop-stage 3 # run MVQ training
 Here is the code snippet for training:
 .. code-block:: bash
  WORLD_SIZE=$(echo ${CUDA_VISIBLE_DEVICES} | awk '{n=split($1, _, ","); print n}')
  ./pruned_transducer_stateless6/train.py \
    --manifest-dir ./data/vq_fbank_layer36_cb8 \
    --master-port 12359 \
    --full-libri $full_libri \
    --spec-aug-time-warp-factor -1 \
    --max-duration 300 \
    --world-size ${WORLD_SIZE} \
    --num-epochs 30 \
    --exp-dir $exp_dir \
    --enable-distillation True \
    --codebook-loss-scale 0.01
 There are a few training arguments in the following
 training commands that should be paid attention to.
  - ``--enable-distillation`` If True, knowledge distillation training is enabled.
  - ``--codebook-loss-scale`` The scale of the knowledge distillation loss.
  - ``--manifest-dir`` The path to the MVQ-augmented manifest.
 Decoding
 --------
 After training finished, you can test the performance on using
 the following command.
 .. code-block:: bash
  export CUDA_VISIBLE_DEVICES=0
  ./pruned_transducer_stateless6/train.py \
    --decoding-method "modified_beam_search" \
    --epoch 30 \
    --avg 10 \
    --max-duration 200 \
    --exp-dir $exp_dir \
    --enable-distillation True
 You should get similar results as `here <https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS-100hours.md#distillation-with-hubert>`_.
 That's all! Feel free to experiment with your own setups and report your results.
 If you encounter any problems during training, please open up an issue `here <https://github.com/k2-fsa/icefall/issues>`_.
--- a/Show More
+++ b/Show More
`@ -1 +1 @@`
	<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>	<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="80" height="20" role="img" aria-label="k2: >= v1.9"><title>k2: >= v1.9</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="80" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="23" height="20" fill="#555"/><rect x="23" width="57" height="20" fill="blueviolet"/><rect width="80" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="125" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="130">k2</text><text x="125" y="140" transform="scale(.1)" fill="#fff" textLength="130">k2</text><text aria-hidden="true" x="505" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="470">>= v1.9</text><text x="505" y="140" transform="scale(.1)" fill="#fff" textLength="470">>= v1.9</text></g></svg>