Fix transformer decoder layer (#1995 )

Musan implementation for ReazonSpeech (#1988 )
support left pad for make_pad_mask (#1990 )
2025-08-09 10:02:22 +00:00 · 2025-07-18 20:12:29 +08:00 · 2025-07-18 17:16:19 +08:00 · 2025-07-16 23:59:04 +08:00 · 2025-07-11 13:24:01 +08:00 · 2025-07-10 15:27:08 +08:00
3118 changed files with 630494 additions and 3466 deletions
--- a/.flake8
+++ b/.flake8
@ -1,11 +1,35 @@
 [flake8]
 show-source=true
 statistics=true
-max-line-length = 80
+max-line-length = 88
 per-file-ignores =
    # line too long
-    egs/librispeech/ASR/conformer_ctc/conformer.py: E501,
+    icefall/diagnostics.py: E501,
    egs/*/ASR/*/conformer.py: E501,
    egs/*/ASR/pruned_transducer_stateless*/*.py: E501,
    egs/*/ASR/*/optim.py: E501,
    egs/*/ASR/*/scaling.py: E501,
    egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
    egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
    egs/librispeech/ASR/conformer_ctc*/*py: E501,
    egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
    egs/librispeech/ASR/zipformer/*.py: E501, E203
    egs/librispeech/ASR/RESULTS.md: E999,
    egs/ljspeech/TTS/vits/*.py: E501, E203
    # invalid escape sequence (cause by tex formular), W605
    icefall/utils.py: E501, W605
 exclude =
  .git,
-  **/data/**
+  **/data/**,
  icefall/shared/make_kn_lm.py,
  icefall/__init__.py
  icefall/ctc/__init__.py
 ignore =
  # E203 white space before ":"
  E203,
  # W503 line break before binary operator
  W503,
  # E226 missing whitespace around arithmetic operator
  E226,
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -0,0 +1,3 @@
 # Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
 107df3b115a58f1b68a6458c3f94a130004be34c
 d31db010371a4128856480382876acdc0d1739ed
--- a/.github/scripts/.gitignore
+++ b/.github/scripts/.gitignore
@ -0,0 +1 @@
 piper_phonemize.html
--- a/.github/scripts/aishell/ASR/run.sh
+++ b/.github/scripts/aishell/ASR/run.sh
@ -0,0 +1,343 @@
 #!/usr/bin/env bash
 set -ex
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/aishell/ASR
 function download_test_dev_manifests() {
  git lfs install
  fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests
  log "Downloading pre-commputed fbank from $fbank_url"
  git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests
  ln -s $PWD/aishell-test-dev-manifests/data .
 }
 function test_transducer_stateless3_2022_06_20() {
  repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20
  log "Downloading pre-trained model from $repo_url"
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  pushd $repo/exp
  ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt
  popd
  log "test greedy_search with pretrained.py"
  for sym in 1 2 3; do
    log "Greedy search with --max-sym-per-frame $sym"
    ./pruned_transducer_stateless3/pretrained.py \
      --method greedy_search \
      --max-sym-per-frame $sym \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  log "test beam search with pretrained.py"
  for method in modified_beam_search beam_search fast_beam_search; do
    log "$method"
    ./pruned_transducer_stateless3/pretrained.py \
      --method $method \
      --beam-size 4 \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
  echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
  if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
    mkdir -p pruned_transducer_stateless3/exp
    ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
    ln -s $PWD/$repo/data/lang_char data/
    ls -lh data
    ls -lh pruned_transducer_stateless3/exp
    log "Decoding test and dev"
    # use a small value for decoding with CPU
    max_duration=100
    for method in greedy_search fast_beam_search modified_beam_search; do
      log "Decoding with $method"
      ./pruned_transducer_stateless3/decode.py \
        --decoding-method $method \
        --epoch 999 \
        --avg 1 \
        --max-duration $max_duration \
        --exp-dir pruned_transducer_stateless3/exp
    done
    rm pruned_transducer_stateless3/exp/*.pt
  fi
  rm -rf $repo
 }
 function test_zipformer_large_2023_10_24() {
  log "CI testing large model"
  repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/
  log "Downloading pre-trained model from $repo_url"
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  for method in modified_beam_search greedy_search fast_beam_search; do
    log "$method"
    ./zipformer/pretrained.py \
      --method $method \
      --context-size 1 \
      --checkpoint $repo/exp/pretrained.pt \
      --tokens $repo/data/lang_char/tokens.txt \
      --num-encoder-layers 2,2,4,5,4,2 \
      --feedforward-dim 512,768,1536,2048,1536,768 \
      --encoder-dim 192,256,512,768,512,256 \
      --encoder-unmasked-dim 192,192,256,320,256,192 \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  rm -rf $repo
 }
 function test_zipformer_2023_10_24() {
  repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/
  log "Downloading pre-trained model from $repo_url"
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  for method in modified_beam_search greedy_search fast_beam_search; do
    log "$method"
    ./zipformer/pretrained.py \
      --method $method \
      --context-size 1 \
      --checkpoint $repo/exp/pretrained.pt \
      --tokens $repo/data/lang_char/tokens.txt \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  rm -rf $repo
 }
 function test_zipformer_small_2023_10_24() {
  log "CI testing small model"
  repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/
  log "Downloading pre-trained model from $repo_url"
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  for method in modified_beam_search greedy_search fast_beam_search; do
    log "$method"
    ./zipformer/pretrained.py \
      --method $method \
      --context-size 1 \
      --checkpoint $repo/exp/pretrained.pt \
      --tokens $repo/data/lang_char/tokens.txt \
      --num-encoder-layers 2,2,2,2,2,2 \
      --feedforward-dim 512,768,768,768,768,768 \
      --encoder-dim 192,256,256,256,256,256 \
      --encoder-unmasked-dim 192,192,192,192,192,192 \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  rm -rf $repo
 }
 function test_transducer_stateless_modified_2022_03_01() {
  repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01
  log "Downloading pre-trained model from $repo_url"
  git lfs install
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  for sym in 1 2 3; do
    log "Greedy search with --max-sym-per-frame $sym"
    ./transducer_stateless_modified/pretrained.py \
      --method greedy_search \
      --max-sym-per-frame $sym \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  for method in modified_beam_search beam_search; do
    log "$method"
    ./transducer_stateless_modified/pretrained.py \
      --method $method \
      --beam-size 4 \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  rm -rf $repo
 }
 function test_transducer_stateless_modified_2_2022_03_01() {
  repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01
  log "Downloading pre-trained model from $repo_url"
  git lfs install
  git clone $repo_url
  repo=$(basename $repo_url)
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  for sym in 1 2 3; do
    log "Greedy search with --max-sym-per-frame $sym"
    ./transducer_stateless_modified-2/pretrained.py \
      --method greedy_search \
      --max-sym-per-frame $sym \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  for method in modified_beam_search beam_search; do
    log "$method"
    ./transducer_stateless_modified-2/pretrained.py \
      --method $method \
      --beam-size 4 \
      --checkpoint $repo/exp/pretrained.pt \
      --lang-dir $repo/data/lang_char \
      $repo/test_wavs/BAC009S0764W0121.wav \
      $repo/test_wavs/BAC009S0764W0122.wav \
      $repo/test_wavs/BAC009S0764W0123.wav
  done
  rm -rf $repo
 }
 function test_conformer_ctc() {
  repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
  log "Downloading pre-trained model from $repo_url"
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
  pushd $repo
  git lfs pull --include "exp/pretrained.pt"
  git lfs pull --include "data/lang_char/H.fst"
  git lfs pull --include "data/lang_char/HL.fst"
  git lfs pull --include "data/lang_char/HLG.fst"
  popd
  log "Display test files"
  tree $repo/
  ls -lh $repo/test_wavs/*.wav
  log "CTC decoding"
  log "Exporting model with torchscript"
  pushd $repo/exp
  ln -s pretrained.pt epoch-99.pt
  popd
  ./conformer_ctc/export.py \
    --epoch 99 \
    --avg 1 \
    --exp-dir $repo/exp \
    --tokens $repo/data/lang_char/tokens.txt \
    --jit 1
  ls -lh $repo/exp
  ls -lh $repo/data/lang_char
  log "Decoding with H on CPU with OpenFst"
  ./conformer_ctc/jit_pretrained_decode_with_H.py \
    --nn-model $repo/exp/cpu_jit.pt \
    --H $repo/data/lang_char/H.fst \
    --tokens $repo/data/lang_char/tokens.txt \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav
  log "Decoding with HL on CPU with OpenFst"
  ./conformer_ctc/jit_pretrained_decode_with_HL.py \
    --nn-model $repo/exp/cpu_jit.pt \
    --HL $repo/data/lang_char/HL.fst \
    --words $repo/data/lang_char/words.txt \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav
  log "Decoding with HLG on CPU with OpenFst"
  ./conformer_ctc/jit_pretrained_decode_with_HLG.py \
    --nn-model $repo/exp/cpu_jit.pt \
    --HLG $repo/data/lang_char/HLG.fst \
    --words $repo/data/lang_char/words.txt \
    $repo/test_wavs/0.wav \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav
  rm -rf $repo
 }
 download_test_dev_manifests
 test_transducer_stateless3_2022_06_20
 test_zipformer_large_2023_10_24
 test_zipformer_2023_10_24
 test_zipformer_small_2023_10_24
 test_transducer_stateless_modified_2022_03_01
 test_transducer_stateless_modified_2_2022_03_01
 # test_conformer_ctc # fails for torch 1.13.x and torch 2.0.x
--- a/.github/scripts/audioset/AT/run.sh
+++ b/.github/scripts/audioset/AT/run.sh
@ -0,0 +1,94 @@
 #!/usr/bin/env bash
 set -ex
 python3 -m pip install onnxoptimizer onnxsim
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/audioset/AT
 function test_pretrained() {
  repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
  repo=$(basename $repo_url)
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  pushd $repo/exp
  git lfs pull --include pretrained.pt
  ln -s pretrained.pt epoch-99.pt
  ls -lh
  popd
  log "test pretrained.pt"
  python3 zipformer/pretrained.py \
    --checkpoint $repo/exp/pretrained.pt \
    --label-dict $repo/data/class_labels_indices.csv \
    $repo/test_wavs/1.wav \
    $repo/test_wavs/2.wav \
    $repo/test_wavs/3.wav \
    $repo/test_wavs/4.wav
  log "test jit export"
  ls -lh $repo/exp/
  python3 zipformer/export.py \
      --exp-dir $repo/exp \
      --epoch 99 \
      --avg 1 \
      --use-averaged-model 0 \
      --jit 1
  ls -lh $repo/exp/
  log "test jit models"
  python3 zipformer/jit_pretrained.py \
      --nn-model-filename $repo/exp/jit_script.pt \
      --label-dict $repo/data/class_labels_indices.csv \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/2.wav \
      $repo/test_wavs/3.wav \
      $repo/test_wavs/4.wav
  log "test onnx export"
  ls -lh $repo/exp/
  python3 zipformer/export-onnx.py \
      --exp-dir $repo/exp \
      --epoch 99 \
      --avg 1 \
      --use-averaged-model 0
  ls -lh $repo/exp/
  pushd $repo/exp/
  mv model-epoch-99-avg-1.onnx model.onnx
  mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
  popd
  ls -lh $repo/exp/
  log "test onnx models"
  for m in model.onnx model.int8.onnx; do
    log "$m"
    python3 zipformer/onnx_pretrained.py \
        --model-filename $repo/exp/model.onnx \
        --label-dict $repo/data/class_labels_indices.csv \
        $repo/test_wavs/1.wav \
        $repo/test_wavs/2.wav \
        $repo/test_wavs/3.wav \
        $repo/test_wavs/4.wav
  done
  log "prepare data for uploading to huggingface"
  dst=/icefall/model-onnx
  mkdir -p $dst
  cp -v $repo/exp/*.onnx $dst/
  cp -v $repo/data/* $dst/
  cp -av $repo/test_wavs $dst
  ls -lh $dst
  ls -lh $dst/test_wavs
 }
 test_pretrained
--- a/.github/scripts/baker_zh/TTS/run-matcha.sh
+++ b/.github/scripts/baker_zh/TTS/run-matcha.sh
@ -0,0 +1,167 @@
 #!/usr/bin/env bash
 set -ex
 apt-get update
 apt-get install -y sox
 python3 -m pip install numba conformer==0.3.2 diffusers librosa
 python3 -m pip install jieba
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/baker_zh/TTS
 sed -i.bak s/600/8/g ./prepare.sh
 sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
 sed -i.bak s/500/5/g ./prepare.sh
 git diff
 function prepare_data() {
  # We have created a subset of the data for testing
  #
  mkdir -p download
  pushd download
  wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
  tar xvf BZNSYP-samples.tar.bz2
  mv BZNSYP-samples BZNSYP
  rm BZNSYP-samples.tar.bz2
  popd
  ./prepare.sh
  tree .
 }
 function train() {
  pushd ./matcha
  sed -i.bak s/1500/3/g ./train.py
  git diff .
  popd
  ./matcha/train.py \
    --exp-dir matcha/exp \
    --num-epochs 1 \
    --save-every-n 1 \
    --num-buckets 2 \
    --tokens data/tokens.txt \
    --max-duration 20
    ls -lh matcha/exp
 }
 function infer() {
  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
  ./matcha/infer.py \
    --num-buckets 2 \
    --epoch 1 \
    --exp-dir ./matcha/exp \
    --tokens data/tokens.txt \
    --cmvn ./data/fbank/cmvn.json \
    --vocoder ./generator_v2 \
    --input-text "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。" \
    --output-wav ./generated.wav
  ls -lh *.wav
  soxi ./generated.wav
  rm -v ./generated.wav
  rm -v generator_v2
 }
 function export_onnx() {
  pushd matcha/exp
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
  popd
  pushd data/fbank
  rm -v *.json
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
  popd
  ./matcha/export_onnx.py \
    --exp-dir ./matcha/exp \
    --epoch 2000 \
    --tokens ./data/tokens.txt \
    --cmvn ./data/fbank/cmvn.json
  ls -lh *.onnx
  if false; then
    # The CI machine does not have enough memory to run it
    #
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
    python3 ./matcha/export_onnx_hifigan.py
  else
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
  fi
  ls -lh *.onnx
  python3 ./matcha/generate_lexicon.py
  for v in v1 v2 v3; do
    python3 ./matcha/onnx_pretrained.py \
     --acoustic-model ./model-steps-6.onnx \
     --vocoder ./hifigan_$v.onnx \
     --tokens ./data/tokens.txt \
     --lexicon ./lexicon.txt \
     --input-text "当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔。" \
     --output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
  done
  ls -lh /icefall/*.wav
  soxi /icefall/generated-matcha-tts-steps-6-*.wav
  cp ./model-steps-*.onnx /icefall
  d=matcha-icefall-zh-baker
  mkdir $d
  cp -v data/tokens.txt $d
  cp -v lexicon.txt $d
  cp model-steps-3.onnx $d
  pushd $d
  curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
  tar xvf dict.tar.bz2
  rm dict.tar.bz2
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
 cat >README.md <<EOF
 # Introduction
 This model is trained using the dataset from
 https://en.data-baker.com/datasets/freeDatasets/
 The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
 which is about 12 hours.
 **Note**: The dataset is for non-commercial use only.
 You can find the training code at
 https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
 EOF
  ls -lh
  popd
  tar cvjf $d.tar.bz2 $d
  mv $d.tar.bz2 /icefall
  mv $d /icefall
 }
 prepare_data
 train
 infer
 export_onnx
 rm -rfv generator_v* matcha/exp
 git checkout .
--- a/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
+++ b/.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
@ -0,0 +1,19 @@
 #!/usr/bin/env bash
 # This script computes fbank features for the test-clean and test-other datasets.
 # The computed features are saved to ~/tmp/fbank-libri and are
 # cached for later runs
 set -e
 export PYTHONPATH=$PWD:$PYTHONPATH
 echo $PYTHONPATH
 mkdir ~/tmp/fbank-libri
 cd egs/librispeech/ASR
 mkdir -p data
 cd data
 [ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
 cd ..
 ./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
 ls -lh data/fbank/
--- a/.github/scripts/docker/Dockerfile
+++ b/.github/scripts/docker/Dockerfile
@ -0,0 +1,75 @@
 ARG PYTHON_VERSION=3.8
 FROM python:${PYTHON_VERSION}
 ARG TORCHAUDIO_VERSION="0.13.0"
 ARG TORCH_VERSION="1.13.0"
 ARG K2_VERSION="1.24.4.dev20231220"
 ARG KALDIFEAT_VERSION="1.25.3.dev20231221"
 ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}"
 ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
 RUN apt-get update -y && \
    apt-get install -qq -y \
    cmake \
    ffmpeg \
    git \
    git-lfs \
    graphviz \
    less \
    tree \
    vim \
    && \
    apt-get clean && \
    rm -rf /var/cache/apt/archives /var/lib/apt/lists
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${_K2_VERSION}
 LABEL kaldifeat_version=${_KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 # Install dependencies
 RUN pip install --no-cache-dir \
      torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
      torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
      k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
      \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
      conformer==0.3.2 \
      cython \
      diffusers \
      dill \
      espnet_tts_frontend \
      graphviz \
      kaldi-decoder \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      librosa \
      "matplotlib<=3.9.4" \
      multi_quantization \
      numba \
      "numpy<2.0" \
      onnxoptimizer \
      onnxsim \
      onnx==1.17.0 \
      onnxmltools \
      onnxruntime==1.17.1 \
      piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
      pypinyin==0.50.0 \
      pytest \
      sentencepiece>=0.1.96 \
      six \
      tensorboard \
      typeguard
 # RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 #     cd /workspace/icefall && \
 #     pip install --no-cache-dir -r requirements.txt
 #
 # ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 #
 # WORKDIR /workspace/icefall
--- a/.github/scripts/docker/generate_build_matrix.py
+++ b/.github/scripts/docker/generate_build_matrix.py
@ -0,0 +1,140 @@
 #!/usr/bin/env python3
 # Copyright    2023  Xiaomi Corp.        (authors: Fangjun Kuang)
 import argparse
 import json
 def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--min-torch-version",
        help="torch version",
    )
    parser.add_argument(
        "--torch-version",
        help="torch version",
    )
    parser.add_argument(
        "--python-version",
        help="python version",
    )
    return parser.parse_args()
 def version_gt(a, b):
    a_major, a_minor = list(map(int, a.split(".")))[:2]
    b_major, b_minor = list(map(int, b.split(".")))[:2]
    if a_major > b_major:
        return True
    if a_major == b_major and a_minor > b_minor:
        return True
    return False
 def version_ge(a, b):
    a_major, a_minor = list(map(int, a.split(".")))[:2]
    b_major, b_minor = list(map(int, b.split(".")))[:2]
    if a_major > b_major:
        return True
    if a_major == b_major and a_minor >= b_minor:
        return True
    return False
 def get_torchaudio_version(torch_version):
    if torch_version == "1.13.0":
        return "0.13.0"
    elif torch_version == "1.13.1":
        return "0.13.1"
    elif torch_version == "2.0.0":
        return "2.0.1"
    elif torch_version == "2.0.1":
        return "2.0.2"
    else:
        return torch_version
 def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
    k2_version = "1.24.4.dev20250630"
    kaldifeat_version = "1.25.5.dev20250630"
    version = "20250630"
    # torchaudio 2.5.0 does not support python 3.13
    python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
    torch_version = []
    torch_version += ["1.13.0", "1.13.1"]
    torch_version += ["2.0.0", "2.0.1"]
    torch_version += ["2.1.0", "2.1.1", "2.1.2"]
    torch_version += ["2.2.0", "2.2.1", "2.2.2"]
    # Test only torch >= 2.3.0
    torch_version += ["2.3.0", "2.3.1"]
    torch_version += ["2.4.0"]
    torch_version += ["2.4.1"]
    torch_version += ["2.5.0"]
    torch_version += ["2.5.1"]
    torch_version += ["2.6.0", "2.7.0", "2.7.1"]
    if specified_torch_version:
        torch_version = [specified_torch_version]
    if specified_python_version:
        python_version = [specified_python_version]
    matrix = []
    for p in python_version:
        for t in torch_version:
            if min_torch_version and version_gt(min_torch_version, t):
                continue
            # torchaudio <= 1.13.x supports only python <= 3.10
            if version_gt(p, "3.10") and not version_gt(t, "2.0"):
                continue
            # only torch>=2.2.0 supports python 3.12
            if version_gt(p, "3.11") and not version_gt(t, "2.1"):
                continue
            if version_gt(p, "3.12") and not version_gt(t, "2.4"):
                continue
            if version_gt(t, "2.4") and version_gt("3.10", p):
                # torch>=2.5 requires python 3.10
                continue
            k2_version_2 = k2_version
            kaldifeat_version_2 = kaldifeat_version
            matrix.append(
                {
                    "k2-version": k2_version_2,
                    "kaldifeat-version": kaldifeat_version_2,
                    "version": version,
                    "python-version": p,
                    "torch-version": t,
                    "torchaudio-version": get_torchaudio_version(t),
                }
            )
    return matrix
 def main():
    args = get_args()
    matrix = get_matrix(
        min_torch_version=args.min_torch_version,
        specified_torch_version=args.torch_version,
        specified_python_version=args.python_version,
    )
    print(json.dumps({"include": matrix}))
 if __name__ == "__main__":
    main()
--- a/.github/scripts/download-gigaspeech-dev-test-dataset.sh
+++ b/.github/scripts/download-gigaspeech-dev-test-dataset.sh
@ -0,0 +1,17 @@
 #!/usr/bin/env bash
 # This script downloads the pre-computed fbank features for
 # dev and test datasets of GigaSpeech.
 #
 # You will find directories `~/tmp/giga-dev-dataset-fbank` after running
 # this script.
 set -e
 mkdir -p ~/tmp
 cd ~/tmp
 git lfs install
 git clone https://huggingface.co/csukuangfj/giga-dev-dataset-fbank
 ls -lh giga-dev-dataset-fbank/data/fbank
--- a/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
+++ b/.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
@ -0,0 +1,25 @@
 #!/usr/bin/env bash
 # This script downloads the test-clean and test-other datasets
 # of LibriSpeech and unzip them to the folder ~/tmp/download,
 # which is cached by GitHub actions for later runs.
 #
 # You will find directories ~/tmp/download/LibriSpeech after running
 # this script.
 set -e
 mkdir ~/tmp/download
 cd egs/librispeech/ASR
 ln -s ~/tmp/download .
 cd download
 wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
 tar xf test-clean.tar.gz
 rm test-clean.tar.gz
 wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
 tar xf test-other.tar.gz
 rm test-other.tar.gz
 pwd
 ls -lh
 ls -lh LibriSpeech
--- a/.github/scripts/generate-piper-phonemize-page.py
+++ b/.github/scripts/generate-piper-phonemize-page.py
@ -0,0 +1,90 @@
 #!/usr/bin/env python3
 def get_v1_2_0_files():
    prefix = (
        "https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
    )
    files = [
        "piper_phonemize-1.2.0-cp310-cp310-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.2.0-cp311-cp311-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.2.0-cp312-cp312-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.2.0-cp37-cp37m-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.2.0-cp38-cp38-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
        "piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
    ]
    ans = [prefix + f for f in files]
    ans.sort()
    return ans
 def get_v1_3_0_files():
    prefix = (
        "https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
    )
    files = [
        "piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
        "piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
        "piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
        "piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
        "piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
        "piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
        "piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
        "piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
        "piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
        "piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
        "piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
        "piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
        "piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
        "piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
        "piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
        "piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
        "piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
        "piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
        "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
        "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
        "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
        "piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
    ]
    ans = [prefix + f for f in files]
    ans.sort()
    return ans
 def main():
    files = get_v1_3_0_files() + get_v1_2_0_files()
    with open("piper_phonemize.html", "w") as f:
        for url in files:
            file = url.split("/")[-1]
            f.write(f'<a href="{url}">{file}</a><br/>\n')
 if __name__ == "__main__":
    main()
--- a/.github/scripts/install-kaldifeat.sh
+++ b/.github/scripts/install-kaldifeat.sh
@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 # This script installs kaldifeat into the directory ~/tmp/kaldifeat
 # which is cached by GitHub actions for later runs.
 set -e
 mkdir -p ~/tmp
 cd ~/tmp
 git clone https://github.com/csukuangfj/kaldifeat
 cd kaldifeat
 mkdir build
 cd build
 cmake -DCMAKE_BUILD_TYPE=Release ..
 make -j2 _kaldifeat
--- a/.github/scripts/ksponspeech/ASR/run.sh
+++ b/.github/scripts/ksponspeech/ASR/run.sh
@ -0,0 +1,132 @@
 #!/usr/bin/env bash
 set -ex
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/ksponspeech/ASR
 function test_pretrained_non_streaming() {
  git lfs install
  git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-zipformer-2024-06-24
  repo=icefall-asr-ksponspeech-zipformer-2024-06-24
  pushd $repo
  mkdir test_wavs
  cd test_wavs
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/trans.txt
  cd ../exp
  ln -s pretrained.pt epoch-99.pt
  ls -lh
  popd
  log 'test pretrained.py'
  ./zipformer/pretrained.py \
      --checkpoint $repo/exp/pretrained.pt \
      --tokens $repo/data/lang_bpe_5000/tokens.txt \
      --method greedy_search \
      $repo/test_wavs/0.wav \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/2.wav \
      $repo/test_wavs/3.wav
  log 'test export-onnx.py'
  ./zipformer/export-onnx.py \
    --tokens $repo/data/lang_bpe_5000/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $repo/exp/
  ls -lh $repo/exp
  ls -lh $repo/data/lang_bpe_5000/
  log 'test exported onnx models'
  ./zipformer/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
    --tokens $repo/data/lang_bpe_5000/tokens.txt \
    $repo/test_wavs/0.wav
  dst=/tmp/model-2024-06-24
  mkdir -p $dst
  cp -av $repo/test_wavs $dst
  cp -v $repo/exp/*.onnx $dst
  cp -v $repo/exp/*.onnx $dst
  cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
  cp -v $repo/data/lang_bpe_5000/bpe.model $dst
  rm -rf $repo
 }
 function test_pretrained_streaming() {
  git lfs install
  git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
  repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
  pushd $repo
  mkdir test_wavs
  cd test_wavs
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
  curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
  cd ../exp
  ln -s pretrained.pt epoch-99.pt
  ls -lh
  popd
  log 'test pretrained.py'
  ./pruned_transducer_stateless7_streaming/pretrained.py \
      --checkpoint $repo/exp/pretrained.pt \
      --tokens $repo/data/lang_bpe_5000/tokens.txt \
      --method greedy_search \
      $repo/test_wavs/0.wav \
      $repo/test_wavs/1.wav \
      $repo/test_wavs/2.wav \
      $repo/test_wavs/3.wav
  log 'test export-onnx.py'
  ./pruned_transducer_stateless7_streaming/export-onnx.py \
    --tokens $repo/data/lang_bpe_5000/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --decode-chunk-len 32 \
    --exp-dir $repo/exp/
  ls -lh $repo/exp
  ls -lh $repo/data/lang_bpe_5000/
  log 'test exported onnx models'
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
    --tokens $repo/data/lang_bpe_5000/tokens.txt \
    $repo/test_wavs/0.wav
  dst=/tmp/model-2024-06-16
  mkdir -p $dst
  cp -v $repo/exp/*.onnx $dst
  cp -v $repo/exp/*.onnx $dst
  cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
  cp -v $repo/data/lang_bpe_5000/bpe.model $dst
  rm -rf $repo
 }
 test_pretrained_non_streaming
 test_pretrained_streaming
--- a/.github/scripts/librispeech/ASR/run.sh
+++ b/.github/scripts/librispeech/ASR/run.sh
--- a/.github/scripts/librispeech/ASR/run_rknn.sh
+++ b/.github/scripts/librispeech/ASR/run_rknn.sh
@ -0,0 +1,275 @@
 #!/usr/bin/env bash
 set -ex
 python3 -m pip install kaldi-native-fbank soundfile librosa
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 # https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
 # sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
 function export_2023_02_20() {
  d=exp_2023_02_20
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/data/lang_char_bpe/tokens.txt
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/2.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/3.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/4.wav
  ls -lh
  popd
  ./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d/ \
    --decode-chunk-len 64 \
    --num-encoder-layers "2,4,3,2,4" \
    --feedforward-dims "1024,1024,1536,1536,1024" \
    --nhead "8,8,8,8,8" \
    --encoder-dims "384,384,384,384,384" \
    --attention-dims "192,192,192,192,192" \
    --encoder-unmasked-dims "256,256,256,256,256" \
    --zipformer-downsampling-factors "1,2,4,8,2" \
    --cnn-module-kernels "31,31,31,31,31" \
    --decoder-dim 512 \
    --joiner-dim 512
  ls -lh $d/
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
    --tokens $d/tokens.txt \
    $d/0.wav
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
    --tokens $d/tokens.txt \
    $d/1.wav
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
    mkdir -p $dst
    ./pruned_transducer_stateless7_streaming/export_rknn.py \
      --in-encoder $d/encoder-epoch-99-avg-1.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform  2>/dev/null
    ls -lh $dst/
    ./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
      --encoder $d/encoder-epoch-99-avg-1.onnx \
      --decoder $d/decoder-epoch-99-avg-1.onnx \
      --joiner $d/joiner-epoch-99-avg-1.onnx \
      --tokens $d/tokens.txt \
      --wav $d/0.wav
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    rm -rf $dst
  done
 }
 # https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
 # sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
 function export_2023_02_16() {
  d=exp_2023_02_16
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/data/lang_char_bpe/tokens.txt
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/2.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/3.wav
  curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/4.wav
  ls -lh
  popd
  ./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d/ \
    --decode-chunk-len 64 \
    \
    --num-encoder-layers 2,2,2,2,2 \
    --feedforward-dims 768,768,768,768,768 \
    --nhead 4,4,4,4,4 \
    --encoder-dims 256,256,256,256,256 \
    --attention-dims 192,192,192,192,192 \
    --encoder-unmasked-dims 192,192,192,192,192 \
    \
    --zipformer-downsampling-factors "1,2,4,8,2" \
    --cnn-module-kernels "31,31,31,31,31" \
    --decoder-dim 512 \
    --joiner-dim 512
  ls -lh $d/
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
    --tokens $d/tokens.txt \
    $d/0.wav
  ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
    --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
    --tokens $d/tokens.txt \
    $d/1.wav
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
    mkdir -p $dst
    ./pruned_transducer_stateless7_streaming/export_rknn.py \
      --in-encoder $d/encoder-epoch-99-avg-1.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform  2>/dev/null
    ls -lh $dst/
    ./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
      --encoder $d/encoder-epoch-99-avg-1.onnx \
      --decoder $d/decoder-epoch-99-avg-1.onnx \
      --joiner $d/joiner-epoch-99-avg-1.onnx \
      --tokens $d/tokens.txt \
      --wav $d/0.wav
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    rm -rf $dst
  done
 }
 # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
 function export_2023_06_26() {
  d=exp_2023_06_26
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
  curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
  curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
  curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
  ls -lh
  popd
  ./zipformer/export-onnx-streaming.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d \
    --use-ctc 0 \
    --use-transducer 1 \
    \
    --chunk-size 32 \
    --left-context-frames 128 \
    --causal 1
  ls -lh $d/
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
    mkdir -p $dst
    ./zipformer/export_rknn_transducer_streaming.py \
      --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform
    ls -lh $dst/
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    rm -rf $dst
  done
 }
 if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
  export_2023_02_16
  export_2023_02_20
 else
  export_2023_06_26
 fi
--- a/.github/scripts/ljspeech/TTS/run-matcha.sh
+++ b/.github/scripts/ljspeech/TTS/run-matcha.sh
@ -0,0 +1,157 @@
 #!/usr/bin/env bash
 set -ex
 apt-get update
 apt-get install -y sox
 python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
 python3 -m pip install espnet_tts_frontend
 python3 -m pip install numba conformer==0.3.2 diffusers librosa
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/ljspeech/TTS
 sed -i.bak s/600/8/g ./prepare.sh
 sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
 sed -i.bak s/500/5/g ./prepare.sh
 git diff
 function prepare_data() {
  # We have created a subset of the data for testing
  #
  mkdir -p download
  pushd download
  wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
  tar xvf LJSpeech-1.1.tar.bz2
  popd
  ./prepare.sh
  tree .
 }
 function train() {
  pushd ./matcha
  sed -i.bak s/1500/3/g ./train.py
  git diff .
  popd
  ./matcha/train.py \
    --exp-dir matcha/exp \
    --num-epochs 1 \
    --save-every-n 1 \
    --num-buckets 2 \
    --tokens data/tokens.txt \
    --max-duration 20
    ls -lh matcha/exp
 }
 function infer() {
  curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
  ./matcha/infer.py \
    --num-buckets 2 \
    --epoch 1 \
    --exp-dir ./matcha/exp \
    --tokens data/tokens.txt \
    --vocoder ./generator_v1 \
    --input-text "how are you doing?" \
    --output-wav ./generated.wav
  ls -lh *.wav
  soxi ./generated.wav
  rm -v ./generated.wav
  rm -v generator_v1
 }
 function export_onnx() {
  pushd matcha/exp
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
  popd
  pushd data/fbank
  rm -fv *.json
  curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
  popd
  ./matcha/export_onnx.py \
    --exp-dir ./matcha/exp \
    --epoch 4000 \
    --tokens ./data/tokens.txt \
    --cmvn ./data/fbank/cmvn.json
  ls -lh *.onnx
  if false; then
    # The CI machine does not have enough memory to run it
    #
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
    curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
    python3 ./matcha/export_onnx_hifigan.py
  else
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
    curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
  fi
  ls -lh *.onnx
  for v in v1 v2 v3; do
    python3 ./matcha/onnx_pretrained.py \
     --acoustic-model ./model-steps-6.onnx \
     --vocoder ./hifigan_$v.onnx \
     --tokens ./data/tokens.txt \
     --input-text "how are you doing?" \
     --output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
  done
  ls -lh /icefall/*.wav
  soxi /icefall/generated-matcha-tts-steps-6-*.wav
  cp ./model-steps-*.onnx /icefall
  d=matcha-icefall-en_US-ljspeech
  mkdir $d
  cp -v data/tokens.txt $d
  cp model-steps-3.onnx $d
  pushd $d
  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
  tar xf espeak-ng-data.tar.bz2
  rm espeak-ng-data.tar.bz2
 cat >README.md <<EOF
 # Introduction
 This model is trained using the dataset from
 https://keithito.com/LJ-Speech-Dataset/
 The dataset contains only 1 female speaker.
 You can find the training code at
 https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
 EOF
  ls -lh
  popd
  tar cvjf $d.tar.bz2 $d
  mv $d.tar.bz2 /icefall
  mv $d /icefall
 }
 prepare_data
 train
 infer
 export_onnx
 rm -rfv generator_v* matcha/exp
 git checkout .
--- a/.github/scripts/ljspeech/TTS/run.sh
+++ b/.github/scripts/ljspeech/TTS/run.sh
@ -0,0 +1,157 @@
 #!/usr/bin/env bash
 set -ex
 python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
 python3 -m pip install espnet_tts_frontend
 python3 -m pip install numba
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/ljspeech/TTS
 sed -i.bak s/600/8/g ./prepare.sh
 sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
 sed -i.bak s/500/5/g ./prepare.sh
 git diff
 function prepare_data() {
  # We have created a subset of the data for testing
  #
  mkdir -p download
  pushd download
  wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
  tar xvf LJSpeech-1.1.tar.bz2
  popd
  ./prepare.sh
  tree .
 }
 function train() {
  pushd ./vits
  sed -i.bak s/200/3/g ./train.py
  git diff .
  popd
  for t in low medium high; do
    ./vits/train.py \
      --exp-dir vits/exp-$t \
      --model-type $t \
      --num-epochs 1 \
      --save-every-n 1 \
      --num-buckets 2 \
      --tokens data/tokens.txt \
      --max-duration 20
    ls -lh vits/exp-$t
  done
 }
 function infer() {
  for t in low medium high; do
    ./vits/infer.py \
      --num-buckets 2 \
      --model-type $t \
      --epoch 1 \
      --exp-dir ./vits/exp-$t \
      --tokens data/tokens.txt \
      --max-duration 20
  done
 }
 function export_onnx() {
  for t in low medium high; do
    ./vits/export-onnx.py \
      --model-type $t \
      --epoch 1 \
      --exp-dir ./vits/exp-$t \
      --tokens data/tokens.txt
    ls -lh vits/exp-$t/
  done
 }
 function test_medium() {
  git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
  ./vits/export-onnx.py \
    --model-type medium \
    --epoch 820 \
    --exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
    --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
  ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
  ./vits/test_onnx.py \
    --model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
    --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
    --output-filename /icefall/test-medium.wav
  ls -lh /icefall/test-medium.wav
  d=/icefall/vits-icefall-en_US-ljspeech-medium
  mkdir $d
  cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
  cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
  rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
  pushd $d
  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
  tar xf espeak-ng-data.tar.bz2
  rm espeak-ng-data.tar.bz2
  cd ..
  tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
  rm -rf vits-icefall-en_US-ljspeech-medium
  ls -lh *.tar.bz2
  popd
 }
 function test_low() {
  git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
  ./vits/export-onnx.py \
    --model-type low \
    --epoch 1600 \
    --exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
    --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
  ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
  ./vits/test_onnx.py \
    --model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
    --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
    --output-filename /icefall/test-low.wav
  ls -lh /icefall/test-low.wav
  d=/icefall/vits-icefall-en_US-ljspeech-low
  mkdir $d
  cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
  cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
  rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
  pushd $d
  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
  tar xf espeak-ng-data.tar.bz2
  rm espeak-ng-data.tar.bz2
  cd ..
  tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
  rm -rf vits-icefall-en_US-ljspeech-low
  ls -lh *.tar.bz2
  popd
 }
 prepare_data
 train
 infer
 export_onnx
 rm -rf vits/exp-{low,medium,high}
 test_medium
 test_low
--- a/.github/scripts/multi_zh-hans/ASR/run.sh
+++ b/.github/scripts/multi_zh-hans/ASR/run.sh
@ -0,0 +1,756 @@
 #!/usr/bin/env bash
 set -ex
 git config --global user.name "k2-fsa"
 git config --global user.email "csukuangfj@gmail.com"
 git config --global lfs.allowincompletepush true
 python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/multi_zh-hans/ASR
 log "pwd: $PWD"
 function run_2023_9_2() {
  repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
  log "Downloading pre-trained model from $repo_url"
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
  pushd $repo
  cd exp
  git lfs pull --include pretrained.pt
  ln -s pretrained.pt epoch-99.pt
  cd ../data/lang_bpe_2000
  ls -lh
  git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
  git lfs pull --include "*.model"
  ls -lh
  popd
  log "--------------------------------------------"
  log "Export non-streaming ONNX transducer models "
  log "--------------------------------------------"
  ./zipformer/export-onnx.py \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $repo/exp \
    --causal False \
    --fp16 1
  ls -lh $repo/exp
  ./zipformer/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav \
    $repo/test_wavs/DEV_T0000000001.wav \
    $repo/test_wavs/DEV_T0000000002.wav \
    $repo/test_wavs/TEST_MEETING_T0000000113.wav \
    $repo/test_wavs/TEST_MEETING_T0000000219.wav \
    $repo/test_wavs/TEST_MEETING_T0000000351.wav
  ./zipformer/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav \
    $repo/test_wavs/DEV_T0000000001.wav \
    $repo/test_wavs/DEV_T0000000002.wav \
    $repo/test_wavs/TEST_MEETING_T0000000113.wav \
    $repo/test_wavs/TEST_MEETING_T0000000219.wav \
    $repo/test_wavs/TEST_MEETING_T0000000351.wav
  ./zipformer/onnx_pretrained.py \
    --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav \
    $repo/test_wavs/DEV_T0000000001.wav \
    $repo/test_wavs/DEV_T0000000002.wav \
    $repo/test_wavs/TEST_MEETING_T0000000113.wav \
    $repo/test_wavs/TEST_MEETING_T0000000219.wav \
    $repo/test_wavs/TEST_MEETING_T0000000351.wav
  rm -rf $repo
 }
 function run_2023_11_05_streaming() {
  repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
  log "Downloading pre-trained model from $repo_url"
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
  pushd $repo
  cd exp/
  git lfs pull --include pretrained.pt
  rm -fv epoch-20.pt
  rm -fv *.onnx
  ln -s pretrained.pt epoch-20.pt
  cd ../data/lang_bpe_2000
  ls -lh
  git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
  git lfs pull --include "*.model"
  ls -lh
  popd
  log "----------------------------------------"
  log "Export streaming ONNX CTC models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming-ctc.py \
    --exp-dir $repo/exp \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 20 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 1 \
    --fp16 1
  ls -lh $repo/exp/
  log "------------------------------------------------------------"
  log "Test exported streaming ONNX CTC models (greedy search)     "
  log "------------------------------------------------------------"
  test_wavs=(
    DEV_T0000000000.wav
    DEV_T0000000001.wav
    DEV_T0000000002.wav
    TEST_MEETING_T0000000113.wav
    TEST_MEETING_T0000000219.wav
    TEST_MEETING_T0000000351.wav
  )
  for w in ${test_wavs[@]}; do
    log "----fp32----"
    ./zipformer/onnx_pretrained-streaming-ctc.py \
      --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
      --tokens $repo/data/lang_bpe_2000/tokens.txt \
      $repo/test_wavs/$w
    log "----int8----"
    ./zipformer/onnx_pretrained-streaming-ctc.py \
      --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
      --tokens $repo/data/lang_bpe_2000/tokens.txt \
      $repo/test_wavs/$w
    log "----fp16----"
    ./zipformer/onnx_pretrained-streaming-ctc.py \
      --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
      --tokens $repo/data/lang_bpe_2000/tokens.txt \
      $repo/test_wavs/$w
  done
  log "Upload onnx CTC models to huggingface"
  name=(
    sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
    sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
    sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
    )
  for n in ${name[@]}; do
      url=https://huggingface.co/k2-fsa/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
        cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
        cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
        cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
      fi
      cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
      cp -v $repo/data/lang_bpe_2000/bpe.model $dst
      mkdir -p $dst/test_wavs
      cp -v $repo/test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "bpe.model" "*.wav"
      ls -lh
      file bpe.model
      git status
      git add .
      git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
  log "----------------------------------------"
  log "Export streaming ONNX transducer models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming.py \
    --exp-dir $repo/exp \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 20 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 0 \
    --fp16 1
  ls -lh $repo/exp
  log "------------------------------------------------------------"
  log "Test exported streaming ONNX transducer models (Python code)"
  log "------------------------------------------------------------"
  log "test fp32"
  ./zipformer/onnx_pretrained-streaming.py \
    --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav
  log "test int8"
  ./zipformer/onnx_pretrained-streaming.py \
    --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav
  log "test fp16"
  ./zipformer/onnx_pretrained-streaming.py \
    --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
    --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
    --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    $repo/test_wavs/DEV_T0000000000.wav
  name=(
    sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
    sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
    sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
  )
  for n in ${name[@]}; do
      url=https://huggingface.co/csukuangfj/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
        cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
        cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
        cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
        cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
        cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
        cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
        cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
        cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
        cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
      fi
      cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
      cp -v $repo/data/lang_bpe_2000/bpe.model $dst
      mkdir -p $dst/test_wavs
      cp -v $repo/test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "bpe.model" "*.wav"
      ls -lh
      file bpe.model
      git status
      git add .
      git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
 }
 function run_2023_12_12_streaming() {
  log "Upload onnx transducer models to huggingface"
  url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
  GIT_LFS_SKIP_SMUDGE=1 git clone $url
  dst=$(basename $url)
  cp -v $repo/exp/encoder*.onnx $dst
  cp -v $repo/exp/decoder*.onnx $dst
  cp -v $repo/exp/joiner*.onnx $dst
  cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
  cp -v $repo/data/lang_bpe_2000/bpe.model $dst
  mkdir -p $dst/test_wavs
  cp -v $repo/test_wavs/*.wav $dst/test_wavs
  cd $dst
  git lfs track "*.onnx" bpe.model "*.wav"
  git add .
  git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
  log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
  rm -rf .git
  rm -fv .gitattributes
  cd ..
  tar cjfv $dst.tar.bz2 $dst
  ls -lh *.tar.bz2
  mv -v $dst.tar.bz2 ../../../
 }
 function run_yuekai_large() {
  repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
  log "Downloading pre-trained model from $repo_url"
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
  pushd $repo
  git lfs pull --include pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
  popd
  log "----------------------------------------"
  log "Export streaming ONNX CTC models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming-ctc.py \
    --exp-dir $repo/ \
    --tokens $repo/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 99 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 1 \
    \
    --num-encoder-layers 2,2,4,5,4,2 \
    --feedforward-dim 768,1024,1536,2048,1536,768 \
    --encoder-dim 256,384,512,768,512,256 \
    --encoder-unmasked-dim 192,192,256,320,256,192 \
    \
    --fp16 1 \
    --use-whisper-features 1
  ls -lh $repo/
  pushd $repo
 cat >README.md <<EOF
 # Introduction
 This model is converted
 from
 https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
 The training code can be found at
 https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
 EOF
  mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
  mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
  mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
  ls -lh *.onnx
  mkdir test_wavs
  cd test_wavs
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
  popd
  for w in 0.wav 1.wav 8k.wav; do
    log "---fp32---"
    sherpa-onnx \
      --zipformer2-ctc-model=$repo/model.onnx \
      --tokens=$repo/tokens.txt \
      $repo/test_wavs/$w
    log "---int8---"
    sherpa-onnx \
      --zipformer2-ctc-model=$repo/model.int8.onnx \
      --tokens=$repo/tokens.txt \
      $repo/test_wavs/$w
    log "---fp16---"
    sherpa-onnx \
      --zipformer2-ctc-model=$repo/model.fp16.onnx \
      --tokens=$repo/tokens.txt \
      $repo/test_wavs/$w
  done
  name=(
    sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
    sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
    sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
  )
  for n in ${name[@]}; do
      url=https://huggingface.co/csukuangfj/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
        cp -v $repo/model.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
        cp -v $repo/model.int8.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
        cp -v $repo/model.fp16.onnx $dst
      fi
      cp -v $repo/tokens.txt $dst
      cp -v $repo/README.md $dst
      mkdir -p $dst/test_wavs
      cp -v $repo/test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "*.wav"
      ls -lh
      git status
      git add .
      git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
  rm $repo/*.onnx
  log "----------------------------------------"
  log "Export streaming ONNX transducer models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming.py \
    --exp-dir $repo \
    --tokens $repo/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 99 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 0 \
    \
    --num-encoder-layers 2,2,4,5,4,2 \
    --feedforward-dim 768,1024,1536,2048,1536,768 \
    --encoder-dim 256,384,512,768,512,256 \
    --encoder-unmasked-dim 192,192,256,320,256,192 \
    \
    --fp16 1 \
    --use-whisper-features 1
  ls -lh $repo
  pushd $repo
  for m in encoder decoder joiner; do
    mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
    mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
    mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
  done
  ls -lh *.onnx
  popd
  for w in 0.wav 1.wav 8k.wav; do
    log "---fp32---"
      sherpa-onnx \
        --encoder=$repo/encoder.onnx \
        --decoder=$repo/decoder.onnx \
        --joiner=$repo/joiner.onnx \
        --tokens=$repo/tokens.txt \
        $repo/test_wavs/$w
    log "---int8---"
      sherpa-onnx \
        --encoder=$repo/encoder.int8.onnx \
        --decoder=$repo/decoder.onnx \
        --joiner=$repo/joiner.int8.onnx \
        --tokens=$repo/tokens.txt \
        $repo/test_wavs/$w
    log "---fp16---"
      sherpa-onnx \
        --encoder=$repo/encoder.fp16.onnx \
        --decoder=$repo/decoder.fp16.onnx \
        --joiner=$repo/joiner.fp16.onnx \
        --tokens=$repo/tokens.txt \
        $repo/test_wavs/$w
  done
  name=(
    sherpa-onnx-streaming-zipformer-zh-2025-06-30
    sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
    sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
  )
  for n in ${name[@]}; do
      url=https://huggingface.co/csukuangfj/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
        cp -v $repo/encoder.onnx $dst
        cp -v $repo/decoder.onnx $dst
        cp -v $repo/joiner.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
        cp -v $repo/encoder.int8.onnx $dst
        cp -v $repo/decoder.onnx $dst
        cp -v $repo/joiner.int8.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
        cp -v $repo/encoder.fp16.onnx $dst
        cp -v $repo/decoder.fp16.onnx $dst
        cp -v $repo/joiner.fp16.onnx $dst
      fi
      cp -v $repo/tokens.txt $dst
      cp -v $repo/README.md $dst
      mkdir -p $dst/test_wavs
      cp -v $repo/test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "*.wav"
      ls -lh
      git status
      git add .
      git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
 }
 function run_yuekai_xl() {
  repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
  log "Downloading pre-trained model from $repo_url"
  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
  repo=$(basename $repo_url)
  pushd $repo
  git lfs pull --include pretrained.pt
  git lfs pull --include data/lang_bpe_2000/bpe.model
  mv pretrained.pt epoch-99.pt
  ls -lh *.pt
  popd
  log "----------------------------------------"
  log "Export streaming ONNX CTC models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming-ctc.py \
    --exp-dir $repo/ \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 99 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 1 \
    \
    --num-encoder-layers 2,3,5,6,5,3 \
    --feedforward-dim 1536,2048,3072,4096,3072,1536 \
    --encoder-dim 512,768,1024,1536,1024,512 \
    --encoder-unmasked-dim 192,192,256,320,256,192 \
    --decoder-dim 768 --joiner-dim 768 \
    --value-head-dim 18 \
    --query-head-dim 48 \
    --num-heads 4,4,4,8,4,4 \
    \
    --fp16 1 \
    --use-whisper-features 1 \
    --use-external-data 1
  mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
  mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
  ls -lh *.onnx
  mkdir test_wavs
  pushd test_wavs
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
  curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
  popd
  for w in 0.wav 1.wav 8k.wav; do
    log "---int8---"
    sherpa-onnx \
      --zipformer2-ctc-model=./model.int8.onnx \
      --tokens=$repo/data/lang_bpe_2000/tokens.txt \
      test_wavs/$w
    log "---fp16---"
    sherpa-onnx \
      --zipformer2-ctc-model=./model.fp16.onnx \
      --tokens=$repo/data/lang_bpe_2000/tokens.txt \
      test_wavs/$w
  done
  pushd $repo
 cat >README.md <<EOF
 # Introduction
 This model is converted
 from
 https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
 The training code can be found at
 https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
 EOF
  popd
  name=(
    sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
    sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
  )
  for n in ${name[@]}; do
      url=https://huggingface.co/csukuangfj/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
        cp -v model.fp16.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
        cp -v model.int8.onnx $dst
      fi
      cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
      cp -v $repo/data/lang_bpe_2000/bpe.model $dst
      cp -v $repo/README.md $dst
      mkdir -p $dst/test_wavs
      cp -v ./test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "*.wav" "bpe.model"
      ls -lh
      git status
      git add .
      git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      ls -lh $dst
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
  rm -fv *.onnx *.weights
  log "----------------------------------------"
  log "Export streaming ONNX transducer models "
  log "----------------------------------------"
  ./zipformer/export-onnx-streaming.py \
    --exp-dir $repo/ \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
    --causal 1 \
    --avg 1 \
    --epoch 99 \
    --use-averaged-model 0 \
    --chunk-size 16 \
    --left-context-frames 128 \
    --use-ctc 0 \
    \
    --num-encoder-layers 2,3,5,6,5,3 \
    --feedforward-dim 1536,2048,3072,4096,3072,1536 \
    --encoder-dim 512,768,1024,1536,1024,512 \
    --encoder-unmasked-dim 192,192,256,320,256,192 \
    --decoder-dim 768 --joiner-dim 768 \
    --value-head-dim 18 \
    --query-head-dim 48 \
    --num-heads 4,4,4,8,4,4 \
    \
    --fp16 1 \
    --use-whisper-features 1 \
    --use-external-data 1
    ls -lh *.onnx
    ls -lh *.weights
    mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
    mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
    mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
    mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
    mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
    mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
  name=(
    sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
    sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
  )
  for n in ${name[@]}; do
      url=https://huggingface.co/csukuangfj/$n
      GIT_LFS_SKIP_SMUDGE=1 git clone $url
      dst=$(basename $url)
      if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
        cp -v encoder.fp16.onnx $dst
        cp -v decoder.fp16.onnx $dst
        cp -v joiner.fp16.onnx $dst
      elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
        cp -v encoder.int8.onnx $dst
        cp -v decoder.onnx $dst
        cp -v joiner.int8.onnx $dst
      fi
      cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
      cp -v $repo/data/lang_bpe_2000/bpe.model $dst
      cp -v $repo/README.md $dst
      mkdir -p $dst/test_wavs
      cp -v ./test_wavs/*.wav $dst/test_wavs
      cd $dst
      git lfs track "*.onnx" "*.wav" "bpe.model"
      ls -lh
      git status
      git add .
      git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
      log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
      rm -rf .git
      rm -fv .gitattributes
      cd ..
      ls -lh $dst
      tar cjfv $dst.tar.bz2 $dst
      ls -lh *.tar.bz2
      mv -v $dst.tar.bz2 ../../../
  done
  rm -fv *.onnx *.weights
 }
 # run_yuekai_large
 # run_yuekai_xl
 # run_2023_9_2
 run_2023_11_05_streaming
 # run_2023_12_12_streaming
--- a/.github/scripts/multi_zh-hans/ASR/run_rknn.sh
+++ b/.github/scripts/multi_zh-hans/ASR/run_rknn.sh
@ -0,0 +1,73 @@
 #!/usr/bin/env bash
 set -ex
 python3 -m pip install kaldi-native-fbank soundfile librosa
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/multi_zh-hans/ASR
 # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
 function export_2023_11_05() {
  d=exp
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
  curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
  curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000001.wav
  curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000002.wav
  ls -lh
  popd
  ./zipformer/export-onnx-streaming.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d \
    --use-ctc 0 \
    --use-transducer 1 \
    --chunk-size 32 \
    --left-context-frames 128 \
    --causal 1
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
    mkdir -p $dst
    ./zipformer/export_rknn_transducer_streaming.py \
      --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    rm -rf $dst
  done
 }
 export_2023_11_05
--- a/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
+++ b/.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
@ -0,0 +1,13 @@
 #!/usr/bin/env bash
 # This script assumes that test-clean and test-other are downloaded
 # to egs/librispeech/ASR/download/LibriSpeech and generates manifest
 # files in egs/librispeech/ASR/data/manifests
 set -e
 cd egs/librispeech/ASR
 [ ! -e download ] && ln -s ~/tmp/download .
 mkdir -p data/manifests
 lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
 ls -lh data/manifests
--- a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
+++ b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
@ -0,0 +1,62 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/gigaspeech/ASR
 repo_url=https://huggingface.co/wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p pruned_transducer_stateless2/exp
  ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh data/lang_bpe_500
  ls -lh data/fbank
  ls -lh pruned_transducer_stateless2/exp
  pushd data/fbank
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
  ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
  ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
  popd
  log "Decoding dev and test"
  # use a small value for decoding with CPU
  max_duration=100
  # Test only greedy_search to reduce CI running time
  # for method in greedy_search fast_beam_search modified_beam_search; do
  for method in greedy_search; do
    log "Decoding with $method"
    ./pruned_transducer_stateless2/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --max-duration $max_duration \
      --exp-dir pruned_transducer_stateless2/exp
  done
  rm pruned_transducer_stateless2/exp/*.pt
 fi
--- a/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
+++ b/.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
@ -0,0 +1,172 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/gigaspeech/ASR
 repo_url=https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "data/lang_bpe_500/tokens.txt"
 git lfs pull --include "exp/jit_script.pt"
 git lfs pull --include "exp/pretrained.pt"
 rm epoch-30.pt
 ln -s pretrained.pt epoch-30.pt
 rm *.onnx
 ls -lh
 popd
 log "----------------------------------------"
 log "Export ONNX transducer models "
 log "----------------------------------------"
 ./zipformer/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 30 \
  --avg 1 \
  --exp-dir $repo/exp
 ls -lh $repo/exp
 log "------------------------------------------------------------"
 log "Test exported ONNX transducer models (Python code)          "
 log "------------------------------------------------------------"
 log "test fp32"
 ./zipformer/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "test int8"
 ./zipformer/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "Upload models to huggingface"
 git config --global user.name "k2-fsa"
 git config --global user.email "xxx@gmail.com"
 url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
 GIT_LFS_SKIP_SMUDGE=1 git clone $url
 dst=$(basename $url)
 cp -v $repo/exp/*.onnx $dst
 cp -v $repo/data/lang_bpe_500/tokens.txt $dst
 cp -v $repo/data/lang_bpe_500/bpe.model $dst
 mkdir -p $dst/test_wavs
 cp -v $repo/test_wavs/*.wav $dst/test_wavs
 cd $dst
 git lfs track "*.onnx"
 git add .
 git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
 log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
 rm -rf .git
 rm -fv .gitattributes
 cd ..
 tar cjfv $dst.tar.bz2 $dst
 ls -lh
 mv -v $dst.tar.bz2 ../../../
 log "Export to torchscript model"
 ./zipformer/export.py \
  --exp-dir $repo/exp \
  --use-averaged-model false \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 30 \
  --avg 1 \
  --jit 1
 ls -lh $repo/exp/*.pt
 log "Decode with models exported by torch.jit.script()"
 ./zipformer/jit_pretrained.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --nn-model-filename $repo/exp/jit_script.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for method in greedy_search modified_beam_search fast_beam_search; do
  log "$method"
  ./zipformer/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --tokens $repo/data/lang_bpe_500/tokens.txt \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode"  ]]; then
  mkdir -p zipformer/exp
  ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
  mkdir -p data
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh zipformer/exp
  mkdir -p data/fbank
  pushd data/fbank
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
  curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
  ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
  ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
  popd
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search; do
    log "Decoding with $method"
    ./zipformer/decode.py \
      --decoding-method $method \
      --epoch 30 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir zipformer/exp
  done
  rm zipformer/exp/*.pt
 fi
--- a/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
+++ b/.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
@ -0,0 +1,191 @@
 #!/usr/bin/env bash
 #
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 abs_repo=$(realpath $repo)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Test exporting with torch.jit.trace()"
 ./lstm_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --jit-trace 1
 log "Decode with models exported by torch.jit.trace()"
 ./lstm_transducer_stateless2/jit_pretrained.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
  --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
  --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./lstm_transducer_stateless2/pretrained.py \
    --method greedy_search \
    --max-sym-per-frame $sym \
    --checkpoint $repo/exp/pretrained.pt \
    --tokens $repo/data/lang_bpe_500/tokens.txt \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./lstm_transducer_stateless2/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --tokens $repo/data/lang_bpe_500/tokens.txt \
    $repo/test_wavs/1089-134686-0001.wav \
    $repo/test_wavs/1221-135766-0001.wav \
    $repo/test_wavs/1221-135766-0002.wav
 done
 echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
 echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
 if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
  lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
  log "Download pre-trained RNN-LM model from ${lm_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
  lm_repo=$(basename $lm_repo_url)
  pushd $lm_repo
  git lfs pull --include "exp/pretrained.pt"
  mv exp/pretrained.pt exp/epoch-88.pt
  popd
  mkdir -p lstm_transducer_stateless2/exp
  ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other with RNN LM"
  ./lstm_transducer_stateless2/decode.py \
    --use-averaged-model 0 \
    --epoch 999 \
    --avg 1 \
    --exp-dir lstm_transducer_stateless2/exp \
    --max-duration 600 \
    --decoding-method modified_beam_search_lm_shallow_fusion \
    --beam 4 \
    --use-shallow-fusion 1 \
    --lm-type rnn \
    --lm-exp-dir $lm_repo/exp \
    --lm-epoch 88 \
    --lm-avg 1 \
    --lm-scale 0.3 \
    --rnn-lm-num-layers 3 \
    --rnn-lm-tie-weights 1
 fi
 if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
  bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
  log "Download bi-gram LM from ${bigram_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
  bigramlm_repo=$(basename $bigram_repo_url)
  pushd $bigramlm_repo
  git lfs pull --include "2gram.fst.txt"
  cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
  popd
  lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
  log "Download pre-trained RNN-LM model from ${lm_repo_url}"
  GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
  lm_repo=$(basename $lm_repo_url)
  pushd $lm_repo
  git lfs pull --include "exp/pretrained.pt"
  mv exp/pretrained.pt exp/epoch-88.pt
  popd
  mkdir -p lstm_transducer_stateless2/exp
  ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other"
  ./lstm_transducer_stateless2/decode.py \
    --use-averaged-model 0 \
    --epoch 999 \
    --avg 1 \
    --exp-dir lstm_transducer_stateless2/exp \
    --max-duration 600 \
    --decoding-method modified_beam_search_LODR \
    --beam 4 \
    --use-shallow-fusion 1 \
    --lm-type rnn \
    --lm-exp-dir $lm_repo/exp \
    --lm-scale 0.4 \
    --lm-epoch 88 \
    --rnn-lm-avg 1 \
    --rnn-lm-num-layers 3 \
    --rnn-lm-tie-weights 1 \
    --tokens-ngram 2 \
    --ngram-lm-scale -0.16
 fi
 if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
  mkdir -p lstm_transducer_stateless2/exp
  ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
  ln -s $PWD/$repo/data/lang_bpe_500 data/
  ls -lh data
  ls -lh lstm_transducer_stateless2/exp
  log "Decoding test-clean and test-other"
  # use a small value for decoding with CPU
  max_duration=100
  for method in greedy_search fast_beam_search; do
    log "Decoding with $method"
    ./lstm_transducer_stateless2/decode.py \
      --decoding-method $method \
      --epoch 999 \
      --avg 1 \
      --use-averaged-model 0 \
      --max-duration $max_duration \
      --exp-dir lstm_transducer_stateless2/exp
  done
  rm lstm_transducer_stateless2/exp/*.pt
 fi
--- a/.github/scripts/run-multi-corpora-zipformer.sh
+++ b/.github/scripts/run-multi-corpora-zipformer.sh
@ -0,0 +1,135 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/multi_zh-hans/ASR
 log "==== Test icefall-asr-multi-zh-hans-zipformer-2023-9-2 ===="
 repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s epoch-20.pt epoch-99.pt
 popd
 ls -lh $repo/exp/*.pt
 ./zipformer/pretrained.py \
  --checkpoint $repo/exp/epoch-99.pt \
  --tokens $repo/data/lang_bpe_2000/tokens.txt \
  --method greedy_search \
 $repo/test_wavs/DEV_T0000000000.wav \
 $repo/test_wavs/DEV_T0000000001.wav \
 $repo/test_wavs/DEV_T0000000002.wav
 for method in modified_beam_search fast_beam_search; do
  log "$method"
  ./zipformer/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/epoch-99.pt \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav
 done
 rm -rf $repo
 log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ===="
 repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s epoch-20.pt epoch-99.pt
 popd
 ls -lh $repo/exp/*.pt
 ./zipformer/pretrained.py \
  --checkpoint $repo/exp/epoch-99.pt \
  --tokens $repo/data/lang_bpe_2000/tokens.txt \
  --use-ctc 1 \
  --method greedy_search \
 $repo/test_wavs/DEV_T0000000000.wav \
 $repo/test_wavs/DEV_T0000000001.wav \
 $repo/test_wavs/DEV_T0000000002.wav
 for method in modified_beam_search fast_beam_search; do
  log "$method"
  ./zipformer/pretrained.py \
    --method $method \
    --beam-size 4 \
    --use-ctc 1 \
    --checkpoint $repo/exp/epoch-99.pt \
    --tokens $repo/data/lang_bpe_2000/tokens.txt \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav
 done
 rm -rf $repo
 cd ../../../egs/multi_zh_en/ASR
 log "==== Test icefall-asr-zipformer-multi-zh-en-2023-11-22 ===="
 repo_url=https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 ./zipformer/pretrained.py \
  --checkpoint $repo/exp/pretrained.pt \
  --bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
  --method greedy_search \
 $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
 $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
 $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
 for method in modified_beam_search fast_beam_search; do
  log "$method"
  ./zipformer/pretrained.py \
    --method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/pretrained.pt \
    --bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
  $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
  $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
  $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
 done
 rm -rf $repo
--- a/.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh
+++ b/.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh
@ -0,0 +1,44 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/swbd/ASR
 repo_url=https://huggingface.co/zrjin/icefall-asr-swbd-conformer-ctc-2023-8-26
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s epoch-98.pt epoch-99.pt
 popd
 ls -lh $repo/exp/*.pt
 for method in ctc-decoding 1best; do
  log "$method"
  ./conformer_ctc/pretrained.py \
    --method $method \
    --checkpoint $repo/exp/epoch-99.pt \
    --tokens $repo/data/lang_bpe_500/tokens.txt \
    --words-file $repo/data/lang_bpe_500/words.txt \
    --HLG  $repo/data/lang_bpe_500/HLG.pt \
    --G $repo/data/lm/G_4_gram.pt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 done
--- a/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
+++ b/.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
@ -0,0 +1,119 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/wenetspeech/ASR
 repo_url=https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 git clone $repo_url
 repo=$(basename $repo_url)
 log "Display test files"
 tree $repo/
 ls -lh $repo/test_wavs/*.wav
 pushd $repo/exp
 ln -s pretrained_epoch_10_avg_2.pt pretrained.pt
 ln -s pretrained_epoch_10_avg_2.pt epoch-99.pt
 popd
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless2/export-onnx.py \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_char/tokens.txt \
  --epoch 99 \
  --avg 1
 log "Export to torchscript model"
 ./pruned_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_char/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --jit 1
 ./pruned_transducer_stateless2/export.py \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_char/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --jit-trace 1
 ls -lh $repo/exp/*.onnx
 ls -lh $repo/exp/*.pt
 log "Decode with ONNX models"
 ./pruned_transducer_stateless2/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-10-avg-2.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-10-avg-2.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-10-avg-2.onnx \
  --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj-epoch-10-avg-2.onnx \
  --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj-epoch-10-avg-2.onnx
 ./pruned_transducer_stateless2/onnx_pretrained.py \
  --tokens $repo/data/lang_char/tokens.txt \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav
 log "Decode with models exported by torch.jit.trace()"
 ./pruned_transducer_stateless2/jit_pretrained.py \
  --tokens $repo/data/lang_char/tokens.txt \
  --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
  --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
  --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav
 ./pruned_transducer_stateless2/jit_pretrained.py \
  --tokens $repo/data/lang_char/tokens.txt \
  --encoder-model-filename $repo/exp/encoder_jit_script.pt \
  --decoder-model-filename $repo/exp/decoder_jit_script.pt \
  --joiner-model-filename $repo/exp/joiner_jit_script.pt \
  $repo/test_wavs/DEV_T0000000000.wav \
  $repo/test_wavs/DEV_T0000000001.wav \
  $repo/test_wavs/DEV_T0000000002.wav
 for sym in 1 2 3; do
  log "Greedy search with --max-sym-per-frame $sym"
  ./pruned_transducer_stateless2/pretrained.py \
    --checkpoint $repo/exp/epoch-99.pt \
    --lang-dir $repo/data/lang_char \
    --decoding-method greedy_search \
    --max-sym-per-frame $sym \
    $repo/test_wavs/DEV_T0000000000.wav \
    $repo/test_wavs/DEV_T0000000001.wav \
    $repo/test_wavs/DEV_T0000000002.wav
 done
 for method in modified_beam_search beam_search fast_beam_search; do
  log "$method"
  ./pruned_transducer_stateless2/pretrained.py \
    --decoding-method $method \
    --beam-size 4 \
    --checkpoint $repo/exp/epoch-99.pt \
    --lang-dir $repo/data/lang_char \
    $repo/test_wavs/DEV_T0000000000.wav \
    $repo/test_wavs/DEV_T0000000001.wav \
    $repo/test_wavs/DEV_T0000000002.wav
 done
--- a/.github/scripts/test-ncnn-export.sh
+++ b/.github/scripts/test-ncnn-export.sh
@ -0,0 +1,230 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 pushd egs/librispeech/ASR
 log  "Install ncnn and pnnx"
 # We are using a modified ncnn here. Will try to merge it to the official repo
 # of ncnn
 git clone https://github.com/csukuangfj/ncnn
 pushd ncnn
 git submodule init
 git submodule update python/pybind11
 python3 setup.py bdist_wheel
 ls -lh dist/
 pip install dist/*.whl
 cd tools/pnnx
 mkdir build
 cd build
 echo "which python3"
 which python3
 #/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
 cmake -D Python3_EXECUTABLE=$(which python3) ..
 make -j4 pnnx
 ./src/pnnx || echo "pass"
 popd
 export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
 cd exp
 ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./conv_emformer_transducer_stateless2/export-for-ncnn.py \
  --exp-dir $repo/exp \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --num-encoder-layers 12 \
  --chunk-length 32 \
  --cnn-module-kernel 31 \
  --left-context-length 32 \
  --right-context-length 8 \
  --memory-size 32
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
 cd exp
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./lstm_transducer_stateless2/export-for-ncnn.py \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 python3 ./lstm_transducer_stateless2/ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --exp-dir $repo/exp \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  \
  --decode-chunk-len 32 \
  --num-encoder-layers "2,4,3,2,4" \
  --feedforward-dims "1024,1024,2048,2048,1024" \
  --nhead "8,8,8,8,8" \
  --encoder-dims "384,384,384,384,384" \
  --attention-dims "192,192,192,192,192" \
  --encoder-unmasked-dims "256,256,256,256,256" \
  --zipformer-downsampling-factors "1,2,4,8,2" \
  --cnn-module-kernels "31,31,31,31,31" \
  --decoder-dim 512 \
  --joiner-dim 512
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_char_bpe/L.pt"
 git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
 git lfs pull --include "data/lang_char_bpe/Linv.pt"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-9999.pt
 popd
 ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
  --tokens $repo/data/lang_char_bpe/tokens.txt \
  --exp-dir $repo/exp \
  --use-averaged-model 0 \
  --epoch 9999 \
  --avg 1 \
  --decode-chunk-len 32 \
  --num-encoder-layers "2,4,3,2,4" \
  --feedforward-dims "1024,1024,1536,1536,1024" \
  --nhead "8,8,8,8,8" \
  --encoder-dims "384,384,384,384,384" \
  --attention-dims "192,192,192,192,192" \
  --encoder-unmasked-dims "256,256,256,256,256" \
  --zipformer-downsampling-factors "1,2,4,8,2" \
  --cnn-module-kernels "31,31,31,31,31" \
  --decoder-dim 512 \
  --joiner-dim 512
 pnnx $repo/exp/encoder_jit_trace-pnnx.pt
 pnnx $repo/exp/decoder_jit_trace-pnnx.pt
 pnnx $repo/exp/joiner_jit_trace-pnnx.pt
 python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
  --tokens $repo/data/lang_char_bpe/tokens.txt \
  --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
  --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
  --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
  --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
  --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
  --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
  $repo/test_wavs/0.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
--- a/.github/scripts/test-onnx-export.sh
+++ b/.github/scripts/test-onnx-export.sh
@ -0,0 +1,466 @@
 #!/usr/bin/env bash
 set -e
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/librispeech/ASR
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Export via torch.jit.script()"
 ./zipformer/export.py \
  --use-averaged-model 0 \
  --exp-dir $repo/exp \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --jit 1
 log "Test export to ONNX format"
 ./zipformer/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --num-encoder-layers "2,2,3,4,3,2" \
  --downsampling-factor "1,2,4,8,4,2" \
  --feedforward-dim "512,768,1024,1536,1024,768" \
  --num-heads "4,4,4,8,4,4" \
  --encoder-dim "192,256,384,512,384,256" \
  --query-head-dim 32 \
  --value-head-dim 12 \
  --pos-head-dim 4 \
  --pos-dim 48 \
  --encoder-unmasked-dim "192,192,256,256,256,192" \
  --cnn-module-kernel "31,31,15,15,15,31" \
  --decoder-dim 512 \
  --joiner-dim 512 \
  --causal False \
  --chunk-size "16,32,64,-1" \
  --left-context-frames "64,128,256,-1"
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./zipformer/onnx_check.py \
  --jit-filename $repo/exp/jit_script.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./zipformer/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Test export streaming model to ONNX format"
 ./zipformer/export-onnx-streaming.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --num-encoder-layers "2,2,3,4,3,2" \
  --downsampling-factor "1,2,4,8,4,2" \
  --feedforward-dim "512,768,1024,1536,1024,768" \
  --num-heads "4,4,4,8,4,4" \
  --encoder-dim "192,256,384,512,384,256" \
  --query-head-dim 32 \
  --value-head-dim 12 \
  --pos-head-dim 4 \
  --pos-dim 48 \
  --encoder-unmasked-dim "192,192,256,256,256,192" \
  --cnn-module-kernel "31,31,15,15,15,31" \
  --decoder-dim 512 \
  --joiner-dim 512 \
  --causal True \
  --chunk-size 16 \
  --left-context-frames 64
 ls -lh $repo/exp
 log "Run onnx_pretrained-streaming.py"
 ./zipformer/onnx_pretrained-streaming.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./pruned_transducer_stateless7_streaming/jit_trace_export.py \
  --bpe-model $repo/data/lang_bpe_500/bpe.model \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --decode-chunk-len 32 \
  --exp-dir $repo/exp/
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless7_streaming/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --decode-chunk-len 32 \
  --exp-dir $repo/exp/
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless7_streaming/onnx_check.py \
  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
 log "Downloading pre-trained model from $repo_url"
 git lfs install
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
 cd exp
 ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless3/export.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 9999 \
  --avg 1 \
  --exp-dir $repo/exp/ \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless3/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 9999 \
  --avg 1 \
  --exp-dir $repo/exp/
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless3/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless3/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
 cd exp
 ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless5/export.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --exp-dir $repo/exp \
  --num-encoder-layers 18 \
  --dim-feedforward 2048 \
  --nhead 8 \
  --encoder-dim 512 \
  --decoder-dim 512 \
  --joiner-dim 512 \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless5/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --epoch 99 \
  --avg 1 \
  --use-averaged-model 0 \
  --exp-dir $repo/exp \
  --num-encoder-layers 18 \
  --dim-feedforward 2048 \
  --nhead 8 \
  --encoder-dim 512 \
  --decoder-dim 512 \
  --joiner-dim 512
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless5/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless5/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "exp/pretrained.pt"
 cd exp
 ln -s pretrained.pt epoch-99.pt
 popd
 log "Export via torch.jit.script()"
 ./pruned_transducer_stateless7/export.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --feedforward-dims "1024,1024,2048,2048,1024" \
  --jit 1
 log "Test exporting to ONNX format"
 ./pruned_transducer_stateless7/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --feedforward-dims "1024,1024,2048,2048,1024"
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./pruned_transducer_stateless7/onnx_check.py \
  --jit-filename $repo/exp/cpu_jit.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./pruned_transducer_stateless7/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1089-134686-0001.wav \
  $repo/test_wavs/1221-135766-0001.wav \
  $repo/test_wavs/1221-135766-0002.wav
 log "=========================================================================="
 repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
 cd exp
 ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
 popd
 log "Test exporting to ONNX format"
 ./conv_emformer_transducer_stateless2/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp \
  --num-encoder-layers 12 \
  --chunk-length 32 \
  --cnn-module-kernel 31 \
  --left-context-length 32 \
  --right-context-length 8 \
  --memory-size 32
 log "Run onnx_pretrained.py"
 ./conv_emformer_transducer_stateless2/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1221-135766-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
 log "=========================================================================="
 repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
 GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
 repo=$(basename $repo_url)
 pushd $repo
 git lfs pull --include "data/lang_bpe_500/bpe.model"
 git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
 cd exp
 ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
 popd
 log "Export via torch.jit.trace()"
 ./lstm_transducer_stateless2/export.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp/ \
  --jit-trace 1
 log "Test exporting to ONNX format"
 ./lstm_transducer_stateless2/export-onnx.py \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  --use-averaged-model 0 \
  --epoch 99 \
  --avg 1 \
  --exp-dir $repo/exp
 ls -lh $repo/exp
 log "Run onnx_check.py"
 ./lstm_transducer_stateless2/onnx_check.py \
  --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
  --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
  --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
  --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
 log "Run onnx_pretrained.py"
 ./lstm_transducer_stateless2/onnx_pretrained.py \
  --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
  --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
  --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
  --tokens $repo/data/lang_bpe_500/tokens.txt \
  $repo/test_wavs/1221-135766-0001.wav
 rm -rf $repo
 log "--------------------------------------------------------------------------"
--- a/.github/scripts/wenetspeech/ASR/run_rknn.sh
+++ b/.github/scripts/wenetspeech/ASR/run_rknn.sh
@ -0,0 +1,196 @@
 #!/usr/bin/env bash
 set -ex
 python3 -m pip install kaldi-native-fbank soundfile librosa
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/wenetspeech/ASR
 #https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
 function export_2025_03_02() {
  d=exp_2025_03_02
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/data/lang_char/tokens.txt
  curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -o 0.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000000.wav
  curl -SL -o 1.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000001.wav
  curl -SL -o 2.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000002.wav
  ls -lh
  popd
  ./zipformer/export-onnx-streaming.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d \
    --use-ctc 0 \
    --use-transducer 1 \
    \
    --num-encoder-layers 2,2,2,2,2,2 \
    --feedforward-dim 512,768,768,768,768,768 \
    --encoder-dim 192,256,256,256,256,256 \
    --encoder-unmasked-dim 192,192,192,192,192,192 \
    \
    --chunk-size 32 \
    --left-context-frames 128 \
    --causal 1
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
    mkdir -p $dst
    ./zipformer/export_rknn_transducer_streaming.py \
      --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    rm -rf $dst
  done
  rm -rf $d
 }
 # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-large-chinese
 function export_2025_03_03() {
  d=exp_2025_03_03
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
  curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
  curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
  curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
  ls -lh
  popd
  ./zipformer/export-onnx-streaming.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d \
    --use-ctc 0 \
    --use-transducer 1 \
    \
    --chunk-size 32 \
    --left-context-frames 128 \
    --causal 1
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-zh-2025-03-03
    mkdir -p $dst
    ./zipformer/export_rknn_transducer_streaming.py \
      --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    ls -lh $dst.tar.bz2
    rm -rf $dst
  done
  rm -rf $d
 }
 function export_2023_06_15() {
  d=exp_2023_06_15
  mkdir $d
  pushd $d
  curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
  curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
  mv pretrained.pt epoch-99.pt
  curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
  curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
  curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
  ls -lh
  popd
  ./zipformer/export-onnx-streaming.py \
    --dynamic-batch 0 \
    --enable-int8-quantization 0 \
    --tokens $d/tokens.txt \
    --use-averaged-model 0 \
    --epoch 99 \
    --avg 1 \
    --exp-dir $d \
    --use-ctc 0 \
    --use-transducer 1 \
    \
    --chunk-size 32 \
    --left-context-frames 128 \
    --causal 1
  for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
    dst=sherpa-onnx-$platform-streaming-zipformer-zh-2023-06-15
    mkdir -p $dst
    ./zipformer/export_rknn_transducer_streaming.py \
      --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
      --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
      --out-encoder $dst/encoder.rknn \
      --out-decoder $dst/decoder.rknn \
      --out-joiner $dst/joiner.rknn \
      --target-platform $platform
    cp $d/tokens.txt $dst
    mkdir $dst/test_wavs
    cp $d/*.wav $dst/test_wavs
    tar cjvf $dst.tar.bz2 $dst
    ls -lh $dst.tar.bz2
    mv $dst.tar.bz2 /icefall/
    ls -lh $dst/
    echo "---"
    ls -lh $dst.tar.bz2
    rm -rf $dst
  done
 }
 export_2025_03_02
 export_2025_03_03
 export_2023_06_15
--- a/.github/scripts/yesno/ASR/run.sh
+++ b/.github/scripts/yesno/ASR/run.sh
@ -0,0 +1,86 @@
 #!/usr/bin/env bash
 set -ex
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 cd egs/yesno/ASR
 log "data preparation"
 ./prepare.sh
 log "training"
 python3 ./tdnn/train.py
 log "decoding"
 python3 ./tdnn/decode.py
 log "export to pretrained.pt"
 python3 ./tdnn/export.py --epoch 14 --avg 2
 python3 ./tdnn/pretrained.py \
  --checkpoint ./tdnn/exp/pretrained.pt \
  --HLG ./data/lang_phone/HLG.pt \
  --words-file ./data/lang_phone/words.txt \
  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
  download/waves_yesno/0_0_1_0_0_0_1_0.wav
 log "Test exporting to torchscript"
 python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
 python3 ./tdnn/jit_pretrained.py \
  --nn-model ./tdnn/exp/cpu_jit.pt \
  --HLG ./data/lang_phone/HLG.pt \
  --words-file ./data/lang_phone/words.txt \
  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
  download/waves_yesno/0_0_1_0_0_0_1_0.wav
 log "Test exporting to onnx"
 python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
 log "Test float32 model"
 python3 ./tdnn/onnx_pretrained.py \
  --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
  --HLG ./data/lang_phone/HLG.pt \
  --words-file ./data/lang_phone/words.txt \
  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
  download/waves_yesno/0_0_1_0_0_0_1_0.wav
 log "Test int8 model"
 python3 ./tdnn/onnx_pretrained.py \
  --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
  --HLG ./data/lang_phone/HLG.pt \
  --words-file ./data/lang_phone/words.txt \
  download/waves_yesno/0_0_0_1_0_0_0_1.wav \
  download/waves_yesno/0_0_1_0_0_0_1_0.wav
 log "Test decoding with H"
 python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
 python3 ./tdnn/jit_pretrained_decode_with_H.py \
    --nn-model ./tdnn/exp/cpu_jit.pt \
    --H ./data/lang_phone/H.fst \
    --tokens ./data/lang_phone/tokens.txt \
    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
 log "Test decoding with HL"
 python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
 python3 ./tdnn/jit_pretrained_decode_with_HL.py \
    --nn-model ./tdnn/exp/cpu_jit.pt \
    --HL ./data/lang_phone/HL.fst \
    --words ./data/lang_phone/words.txt \
    ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
    ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
    ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
 log "Show generated files"
 ls -lh tdnn/exp
 ls -lh data/lang_phone
--- a/.github/workflows/aishell.yml
+++ b/.github/workflows/aishell.yml
@ -0,0 +1,72 @@
 name: aishell
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: aishell-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          echo "::set-output name=matrix::${MATRIX}"
  aishell:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Run aishell tests
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              .github/scripts/aishell/ASR/run.sh
--- a/.github/workflows/audioset.yml
+++ b/.github/workflows/audioset.yml
@ -0,0 +1,137 @@
 name: audioset
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: audioset-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          echo "::set-output name=matrix::${MATRIX}"
  audioset:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          ls -lh
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Run tests
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              .github/scripts/audioset/AT/run.sh
      - name: Show model files
        shell: bash
        run: |
          sudo chown -R runner ./model-onnx
          ls -lh ./model-onnx
          chmod -x ./model-onnx/class_labels_indices.csv
          echo "----------"
          ls -lh ./model-onnx/*
      - name: Upload model to huggingface
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main
            cp ../model-onnx/*.onnx ./
            cp ../model-onnx/*.csv ./
            cp -a ../model-onnx/test_wavs ./
            ls -lh
            git add .
            git status
            git commit -m "update models"
            git status
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
            rm -rf huggingface
      - name: Prepare for release
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        shell: bash
        run: |
          d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
          mv ./model-onnx $d
          tar cjvf ${d}.tar.bz2 $d
          ls -lh
      - name: Release exported onnx models
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: audio-tagging-models
--- a/.github/workflows/baker_zh.yml
+++ b/.github/workflows/baker_zh.yml
@ -0,0 +1,152 @@
 name: baker_zh
 on:
  push:
    branches:
      - master
      - baker-matcha-2
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: baker-zh-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          echo "::set-output name=matrix::${MATRIX}"
  baker_zh:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          ls -lh
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Run tests
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              pip install onnx==1.17.0
              pip list
              git config --global --add safe.directory /icefall
              .github/scripts/baker_zh/TTS/run-matcha.sh
      - name: display files
        shell: bash
        run: |
          ls -lh
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
          path: ./*.wav
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-2
          path: ./model-steps-2.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-3
          path: ./model-steps-3.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-4
          path: ./model-steps-4.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-5
          path: ./model-steps-5.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-6
          path: ./model-steps-6.onnx
      - name: Upload models to huggingface
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          d=matcha-icefall-zh-baker
          GIT_LFS_SKIP_SMUDGE=1  git clone https://huggingface.co/csukuangfj/$d hf
          cp -av $d/* hf/
          pushd hf
          git add .
          git config --global user.name "csukuangfj"
          git config --global user.email "csukuangfj@gmail.com"
          git config --global lfs.allowincompletepush true
          git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
          popd
      - name: Release exported onnx models
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: matcha-icefall-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
--- a/.github/workflows/build-cpu-docker.yml
+++ b/.github/workflows/build-cpu-docker.yml
@ -0,0 +1,81 @@
 name: build-cpu-docker
 on:
  workflow_dispatch:
 concurrency:
  group: build-cpu-docker-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
          echo "::set-output name=matrix::${MATRIX}"
  build-cpu-docker:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
      - name: 'Login to GitHub Container Registry'
        uses: docker/login-action@v2
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Build docker Image
        shell: bash
        run: |
          cd .github/scripts/docker
          torch_version=${{ matrix.torch-version }}
          torchaudio_version=${{ matrix.torchaudio-version }}
          echo "torch_version: $torch_version"
          echo "torchaudio_version: $torchaudio_version"
          version=${{ matrix.version }}
          tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
          echo "tag: $tag"
          docker build \
            -t $tag \
            --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
            --build-arg TORCH_VERSION=$torch_version \
            --build-arg TORCHAUDIO_VERSION=$torchaudio_version \
            --build-arg K2_VERSION=${{ matrix.k2-version }} \
            --build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \
            .
          docker image ls
          docker push $tag
--- a/.github/workflows/build-doc.yml
+++ b/.github/workflows/build-doc.yml
@ -0,0 +1,74 @@
 # Copyright      2022  Xiaomi Corp.       (author: Fangjun Kuang)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # refer to https://github.com/actions/starter-workflows/pull/47/files
 # You can access it at https://k2-fsa.github.io/icefall/
 name: Generate doc
 on:
  push:
    branches:
    - master
    - doc
  pull_request:
    types: [labeled]
  workflow_dispatch:
 concurrency:
  group: build_doc-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build-doc:
    # if: github.event.label.name == 'doc' || github.event_name == 'push'
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Display Python version
        run: python -c "import sys; print(sys.version)"
      - name: Build doc
        shell: bash
        run: |
          .github/scripts/generate-piper-phonemize-page.py
          cd docs
          python3 -m pip install -r ./requirements.txt
          make html
          touch build/html/.nojekyll
          cp -v ../piper_phonemize.html ./build/html/
      - name: Deploy
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: ./docs/build/html
          publish_branch: gh-pages
--- a/.github/workflows/build-docker-image.yml
+++ b/.github/workflows/build-docker-image.yml
@ -0,0 +1,84 @@
 # see also
 # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
 name: Build docker image
 on:
  workflow_dispatch:
 concurrency:
  group: build_docker-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  build-docker-image:
    name: ${{ matrix.image }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Rename
        shell: bash
        run: |
          image=${{ matrix.image }}
          mv -v ./docker/$image.dockerfile ./Dockerfile
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
      - name: Free more space
        shell: bash
        run: |
          # https://github.com/orgs/community/discussions/25678
          cd /opt
          find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf "/usr/local/share/boost"
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: false
          swap-storage: true
      - name: Check space
        shell: bash
        run: |
          df -h
      - name: Log in to Docker Hub
        uses: docker/login-action@v2
        with:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_PASSWORD }}
      - name: Build and push
        uses: docker/build-push-action@v4
        with:
          context: .
          file: ./Dockerfile
          push: true
          tags: k2fsa/icefall:${{ matrix.image }}
--- a/.github/workflows/ksponspeech.yml
+++ b/.github/workflows/ksponspeech.yml
@ -0,0 +1,167 @@
 name: ksponspeech
 on:
  push:
    branches:
      - ksponspeech
  workflow_dispatch:
 jobs:
  ksponspeech:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Test
        shell: bash
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/ksponspeech/ASR/run.sh
      - name: Show model files (2024-06-24)
        shell: bash
        run: |
          src=/tmp/model-2024-06-24
          ls -lh $src
      - name: Show model files (2024-06-16)
        shell: bash
        run: |
          src=/tmp/model-2024-06-16
          ls -lh $src
      - name: Upload model to huggingface (2024-06-24)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            src=/tmp/model-2024-06-24
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf hf
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 hf
            cd hf
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main
            cp -av $src/* ./
            ls -lh
            git lfs track "bpe.model"
            git lfs track "*.onnx"
            git add .
            git status
            git commit -m "update models"
            git status
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 main || true
            rm -rf hf
      - name: Upload model to huggingface (2024-06-16)
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            src=/tmp/model-2024-06-16
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf hf
            export GIT_LFS_SKIP_SMUDGE=1
            export GIT_CLONE_PROTECTION_ACTIVE=false
            git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
            cd hf
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main
            cp -v $src/* ./
            ls -lh
            git lfs track "bpe.model"
            git lfs track "*.onnx"
            cp -av test_wavs $src/
            git add .
            git status
            git commit -m "update models"
            git status
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
            rm -rf hf
      - name: Prepare for release (2024-06-16)
        shell: bash
        run: |
          src=/tmp/model-2024-06-16
          d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
          mv $src ./$d
          tar cjvf ${d}.tar.bz2 $d
          ls -lh
      - name: Prepare for release (2024-06-24)
        shell: bash
        run: |
          src=/tmp/model-2024-06-24
          d=sherpa-onnx-zipformer-korean-2024-06-24
          mv $src ./$d
          tar cjvf ${d}.tar.bz2 $d
          ls -lh
      - name: Release exported onnx models
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models
--- a/.github/workflows/librispeech.yml
+++ b/.github/workflows/librispeech.yml
@ -0,0 +1,72 @@
 name: librispeech
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: librispeech-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          # MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
          echo "::set-output name=matrix::${MATRIX}"
  librispeech:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Test zipformer/train.py with LibriSpeech
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              .github/scripts/librispeech/ASR/run.sh
--- a/.github/workflows/ljspeech.yml
+++ b/.github/workflows/ljspeech.yml
@ -0,0 +1,166 @@
 name: ljspeech
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: ljspeech-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          echo "::set-output name=matrix::${MATRIX}"
  ljspeech:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          ls -lh
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Run tests
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              pip install "matplotlib<=3.9.4"
              pip list
              .github/scripts/ljspeech/TTS/run-matcha.sh
              .github/scripts/ljspeech/TTS/run.sh
      - name: display files
        shell: bash
        run: |
          ls -lh
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
          path: ./*.wav
      - name: Release exported onnx models
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: vits-icefall-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-2
          path: ./model-steps-2.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-3
          path: ./model-steps-3.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-4
          path: ./model-steps-4.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-5
          path: ./model-steps-5.onnx
      - uses: actions/upload-artifact@v4
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        with:
          name: step-6
          path: ./model-steps-6.onnx
      - name: Upload models to huggingface
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        shell: bash
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          d=matcha-icefall-en_US-ljspeech
          GIT_LFS_SKIP_SMUDGE=1  git clone https://huggingface.co/csukuangfj/$d hf
          cp -av $d/* hf/
          pushd hf
          git lfs track "cmn_dict"
          git lfs track "ru_dict"
          git add .
          git config --global user.name "csukuangfj"
          git config --global user.email "csukuangfj@gmail.com"
          git config --global lfs.allowincompletepush true
          git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
          popd
      - name: Release exported onnx models
        if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: matcha-icefall-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: tts-models
--- a/.github/workflows/multi-zh-hans.yml
+++ b/.github/workflows/multi-zh-hans.yml
@ -0,0 +1,86 @@
 name: multi-zh-hans
 on:
  push:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: multi-zh-hans-${{ github.ref }}
  cancel-in-progress: true
 permissions:
  contents: write
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
          echo "::set-output name=matrix::${MATRIX}"
  multi-zh-hans:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Test with multi_zh-hans
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              export HF_TOKEN=${{ secrets.HF_TOKEN }}
              cd /icefall
              git config --global --add safe.directory /icefall
              .github/scripts/multi_zh-hans/ASR/run.sh
      - name: Show models
        shell: bash
        run: |
          ls -lh *.tar.bz2
      - name: upload model to https://github.com/k2-fsa/sherpa-onnx
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models
--- a/.github/workflows/rknn.yml
+++ b/.github/workflows/rknn.yml
@ -0,0 +1,134 @@
 name: rknn
 on:
  push:
    branches:
      - master
      - rknn-zipformer2
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: rknn-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  rknn:
    name: RKNN ${{ matrix.recipe }} ${{ matrix.rknn_toolkit2_version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        python-version: ["3.10"]
        k2-version: ["1.24.4.dev20241029"]
        kaldifeat-version: ["1.25.5.dev20241029"]
        torch-version: ["2.0.0"]
        torchaudio-version: ["2.0.1"]
        version: ["20241218"]
        # recipe: ["librispeech", "wenetspeech", "multi_zh-hans"]
        recipe: ["librispeech"]
        rknn_toolkit2_version: ["2.2.0", "2.1.0"]
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Export RKNN model
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              cat /etc/*release
              lsb_release -a
              uname -a
              python3 --version
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              python3 -m torch.utils.collect_env
              python3 -m k2.version
              pip list
              export rknn_toolkit2_version=${{ matrix.rknn_toolkit2_version }}
              if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
                # for the folder pruned_transducer_stateless7_streaming
                curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
              else
                # for the folder zipformer/
                curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
              fi
              # Install rknn
              pip install ./*.whl "numpy<=1.26.4"
              pip list | grep rknn
              echo "---"
              pip list
              echo "---"
              recipe=${{ matrix.recipe }}
              .github/scripts/$recipe/ASR/run_rknn.sh > log-$recipe.txt 2>&1 || true
      - uses: actions/upload-artifact@v4
        with:
          name: log-${{ matrix.recipe }}-${{ matrix.rknn_toolkit2_version }}
          path: ./log-*.txt
      - name: Display results
        shell: bash
        run: |
          ls -lh *rk*.tar.bz2 || true
      - name: Release to GitHub
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: sherpa-onnx-*.tar.bz2
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models
      - name: Upload model to huggingface
        if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3
        with:
          max_attempts: 20
          timeout_seconds: 200
          shell: bash
          command: |
            git config --global user.email "csukuangfj@gmail.com"
            git config --global user.name "Fangjun Kuang"
            rm -rf huggingface
            export GIT_LFS_SKIP_SMUDGE=1
            git clone https://huggingface.co/csukuangfj/sherpa-onnx-rknn-models huggingface
            cd huggingface
            git fetch
            git pull
            git merge -m "merge remote" --ff origin main
            dst=streaming-asr
            mkdir -p $dst
            cp ../*rk*.tar.bz2 $dst/ || true
            ls -lh $dst
            git add .
            git status
            git commit -m "update models"
            git status
            git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-rknn-models main || true
            rm -rf huggingface
--- a/.github/workflows/run-docker-image.yml
+++ b/.github/workflows/run-docker-image.yml
@ -0,0 +1,144 @@
 name: Run docker image
 on:
  workflow_dispatch:
 concurrency:
  group: run_docker_image-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run-docker-image:
    name: ${{ matrix.image }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
    steps:
      # refer to https://github.com/actions/checkout
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Free space
        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
      - name: Free more space
        shell: bash
        run: |
          # https://github.com/orgs/community/discussions/25678
          cd /opt
          find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
          sudo rm -rf /usr/share/dotnet
          sudo rm -rf "/usr/local/share/boost"
          sudo rm -rf "$AGENT_TOOLSDIRECTORY"
      - name: Free Disk Space (Ubuntu)
        uses: jlumbroso/free-disk-space@main
        with:
          # this might remove tools that are actually needed,
          # if set to "true" but frees about 6 GB
          tool-cache: false
          # all of these default to true, but feel free to set to
          # "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          docker-images: false
          swap-storage: true
      - name: Check space
        shell: bash
        run: |
          df -h
      - name: Run the build process with Docker
        uses: addnab/docker-run-action@v3
        with:
            image: k2fsa/icefall:${{ matrix.image }}
            shell: bash
            run: |
              uname -a
              cat /etc/*release
              find / -name libcuda* 2>/dev/null
              ls -lh /usr/local/
              ls -lh /usr/local/cuda*
              nvcc --version
              ls -lh /usr/local/cuda-*/compat/*
              # For torch1.9.0-cuda10.2
              export LD_LIBRARY_PATH=/usr/local/cuda-10.2/compat:$LD_LIBRARY_PATH
              # For torch1.12.1-cuda11.3
              export LD_LIBRARY_PATH=/usr/local/cuda-11.3/compat:$LD_LIBRARY_PATH
              # For torch2.0.0-cuda11.7
              export LD_LIBRARY_PATH=/usr/local/cuda-11.7/compat:$LD_LIBRARY_PATH
              # For torch2.1.0-cuda11.8
              export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH
              # For torch2.1.0-cuda12.1
              export LD_LIBRARY_PATH=/usr/local/cuda-12.1/compat:$LD_LIBRARY_PATH
              which nvcc
              cuda_dir=$(dirname $(which nvcc))
              echo "cuda_dir: $cuda_dir"
              find $cuda_dir -name libcuda.so*
              echo "--------------------"
              find / -name libcuda.so* 2>/dev/null
              # for torch1.13.0-cuda11.6
              if [ -e /opt/conda/lib/stubs/libcuda.so ]; then
                cd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && cd -
                export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH
              fi
              find / -name libcuda.so* 2>/dev/null
              echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
              python3 --version
              which python3
              python3 -m pip list
              echo "----------torch----------"
              python3 -m torch.utils.collect_env
              echo "----------k2----------"
              python3 -c "import k2; print(k2.__file__)"
              python3 -c "import k2; print(k2.__dev_version__)"
              python3 -m k2.version
              echo "----------lhotse----------"
              python3 -c "import lhotse; print(lhotse.__file__)"
              python3 -c "import lhotse; print(lhotse.__version__)"
              echo "----------kaldifeat----------"
              python3 -c "import kaldifeat; print(kaldifeat.__file__)"
              python3 -c "import kaldifeat; print(kaldifeat.__version__)"
              echo "Test yesno recipe"
              cd egs/yesno/ASR
              ./prepare.sh
              ./tdnn/train.py
              ./tdnn/decode.py
--- a/.github/workflows/run-gigaspeech-2022-05-13.yml
+++ b/.github/workflows/run-gigaspeech-2022-05-13.yml
@ -0,0 +1,128 @@
 # Copyright      2021  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-gigaspeech-2022-05-13
 # stateless transducer + k2 pruned rnnt-loss + reworked conformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: run_gigaspeech_2022_05_13-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_gigaspeech_2022_05_13:
    if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Download GigaSpeech dev/test dataset
        shell: bash
        run: |
          sudo apt-get install -y -q git-lfs
          .github/scripts/download-gigaspeech-dev-test-dataset.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          ln -s ~/tmp/giga-dev-dataset-fbank/data egs/gigaspeech/ASR/
          ls -lh egs/gigaspeech/ASR/data/fbank
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
      - name: Display decoding results for gigaspeech pruned_transducer_stateless2
        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
        shell: bash
        run: |
          cd egs/gigaspeech/ASR/
          tree ./pruned_transducer_stateless2/exp
          sudo apt-get -qq install tree
          cd pruned_transducer_stateless2
          echo "results for pruned_transducer_stateless2"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2
      - name: Upload decoding results for gigaspeech pruned_transducer_stateless2
        uses: actions/upload-artifact@v4
        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
          path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
--- a/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml
+++ b/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml
@ -0,0 +1,136 @@
 # Copyright      2022  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-gigaspeech-zipformer-2023-10-17
 # zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_gigaspeech_2023_10_17_zipformer:
    if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
      - name: upload model to https://github.com/k2-fsa/sherpa-onnx
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          file: ./*.tar.bz2
          overwrite: true
          repo_name: k2-fsa/sherpa-onnx
          repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
          tag: asr-models
      - name: Display decoding results for gigaspeech zipformer
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
        shell: bash
        run: |
          cd egs/gigaspeech/ASR/
          tree ./zipformer/exp
          cd zipformer
          echo "results for zipformer"
          echo "===greedy search==="
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          # echo "===fast_beam_search==="
          # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          #
          # echo "===modified beam search==="
          # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for gigaspeech zipformer
        uses: actions/upload-artifact@v4
        if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
          path: egs/gigaspeech/ASR/zipformer/exp/
--- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
+++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
@ -0,0 +1,165 @@
 name: run-librispeech-lstm-transducer2-2022-09-03
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_librispeech_lstm_transducer_stateless2_2022_09_03:
    if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Cache LibriSpeech test-clean and test-other datasets
        id: libri-test-clean-and-test-other-data
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/download
          key: cache-libri-test-clean-and-test-other
      - name: Download LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
      - name: Prepare manifests for LibriSpeech test-clean and test-other
        shell: bash
        run: |
          .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
      - name: Cache LibriSpeech test-clean and test-other fbank features
        id: libri-test-clean-and-test-other-fbank
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/fbank-libri
          key: cache-libri-fbank-test-clean-and-test-other-v2
      - name: Compute fbank for LibriSpeech test-clean and test-other
        if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          mkdir -p egs/librispeech/ASR/data
          ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
          ls -lh egs/librispeech/ASR/data/*
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
      - name: Display decoding results for lstm_transducer_stateless2
        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
        shell: bash
        run: |
          cd egs/librispeech/ASR
          tree lstm_transducer_stateless2/exp
          cd lstm_transducer_stateless2/exp
          echo "===greedy search==="
          find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          echo "===fast_beam_search==="
          find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
          # echo "===modified beam search==="
          # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Display decoding results for lstm_transducer_stateless2
        if: github.event.label.name == 'shallow-fusion'
        shell: bash
        run: |
          cd egs/librispeech/ASR
          tree lstm_transducer_stateless2/exp
          cd lstm_transducer_stateless2/exp
          echo "===modified_beam_search_lm_shallow_fusion==="
          echo "===Using RNNLM==="
          find modified_beam_search_lm_shallow_fusion  -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find modified_beam_search_lm_shallow_fusion  -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Display decoding results for lstm_transducer_stateless2
        if: github.event.label.name == 'LODR'
        shell: bash
        run: |
          cd egs/librispeech/ASR
          tree lstm_transducer_stateless2/exp
          cd lstm_transducer_stateless2/exp
          echo "===modified_beam_search_rnnlm_LODR==="
          find modified_beam_search_LODR  -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
          find modified_beam_search_LODR  -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
      - name: Upload decoding results for lstm_transducer_stateless2
        uses: actions/upload-artifact@v4
        if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
        with:
          name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
          path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
--- a/.github/workflows/run-multi-corpora-zipformer.yml
+++ b/.github/workflows/run-multi-corpora-zipformer.yml
@ -0,0 +1,86 @@
 # Copyright      2023   Xiaomi Corp.    (author: Zengrui Jin)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-multi-corpora-zipformer
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  workflow_dispatch:
 concurrency:
  group: run_multi-corpora_zipformer-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_multi-corpora_zipformer:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans' || github.event.label.name == 'zipformer' || github.event.label.name == 'multi-corpora'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-multi-corpora-zipformer.sh
--- a/.github/workflows/run-ptb-rnn-lm.yml
+++ b/.github/workflows/run-ptb-rnn-lm.yml
@ -0,0 +1,73 @@
 name: run-ptb-rnn-lm-training
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: run_ptb_rnn_lm_training-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_ptb_rnn_lm_training:
    if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: ["3.8"]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | grep -v kaldifst | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Prepare data
        shell: bash
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          cd egs/ptb/LM
          ./prepare.sh
      - name: Run training
        shell: bash
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          cd egs/ptb/LM
          ./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
      - name: Upload pretrained models
        uses: actions/upload-artifact@v4
        if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
        with:
          name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
          path: egs/ptb/LM/my-rnnlm-exp/
--- a/.github/workflows/run-swbd-conformer-ctc.yml
+++ b/.github/workflows/run-swbd-conformer-ctc.yml
@ -0,0 +1,86 @@
 # Copyright      2023   Xiaomi Corp.    (author: Zengrui Jin)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-swbd-conformer_ctc
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  workflow_dispatch:
 concurrency:
  group: run-swbd-conformer_ctc-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run-swbd-conformer_ctc:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'swbd'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-swbd-conformer-ctc-2023-08-26.sh
--- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
+++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml
@ -0,0 +1,86 @@
 # Copyright      2021  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-wenetspeech-pruned-transducer-stateless2
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  workflow_dispatch:
 concurrency:
  group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  run_wenetspeech_pruned_transducer_stateless2:
    if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'wenetspeech'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Inference with pre-trained model
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          sudo apt-get -qq install git-lfs tree
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
--- a/.github/workflows/run-yesno-recipe.yml
+++ b/.github/workflows/run-yesno-recipe.yml
@ -1,78 +0,0 @@
 # Copyright      2021  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: run-yesno-recipe
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
 jobs:
  run-yesno-recipe:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        # os: [ubuntu-18.04, macos-10.15]
        # TODO: enable macOS for CPU testing
        os: [ubuntu-18.04]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v1
        with:
          python-version: ${{ matrix.python-version }}
      - name: Install libnsdfile and libsox
        if: startsWith(matrix.os, 'ubuntu')
        run: |
          sudo apt update
          sudo apt install -q -y libsndfile1-dev libsndfile1 ffmpeg
          sudo apt install -q -y --fix-missing sox libsox-dev libsox-fmt-all
      - name: Install Python dependencies
        run: |
          python3 -m pip install --upgrade pip black flake8
          python3 -m pip install -U pip
          python3 -m pip install k2==1.4.dev20210822+cpu.torch1.7.1 -f https://k2-fsa.org/nightly/
          python3 -m pip install torchaudio==0.7.2
          python3 -m pip install git+https://github.com/lhotse-speech/lhotse
          # We are in ./icefall and there is a file: requirements.txt in it
          python3 -m pip install -r requirements.txt
      - name: Run yesno recipe
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          echo $PYTHONPATH
          cd egs/yesno/ASR
          ./prepare.sh
          python3 ./tdnn/train.py
          python3 ./tdnn/decode.py
          # TODO: Check that the WER is less than some value
--- a/.github/workflows/style_check.yml
+++ b/.github/workflows/style_check.yml
@ -24,13 +24,19 @@ on:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: style_check-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  style_check:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ubuntu-18.04, macos-10.15]
+        os: [ubuntu-latest]
-        python-version: [3.7, 3.9]
+        python-version: ["3.10"]
      fail-fast: false
    steps:
@ -45,18 +51,27 @@ jobs:
      - name: Install Python dependencies
        run: |
-          python3 -m pip install --upgrade pip black==21.6b0 flake8==3.9.2
+          python3 -m pip install --upgrade pip black==22.3.0 flake8==5.0.4 click==8.1.0 isort==5.10.1
          # Click issue fixed in https://github.com/psf/black/pull/2966
      - name: Run flake8
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --show-source --statistics
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          flake8 .
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 \
            --statistics --extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503
      - name: Run black
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          black --check --diff .
      - name: Run isort
        shell: bash
        working-directory: ${{github.workspace}}
        run: |
          isort --check --diff .
--- a/.github/workflows/test-ncnn-export.yml
+++ b/.github/workflows/test-ncnn-export.yml
@ -0,0 +1,77 @@
 name: test-ncnn-export
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: test_ncnn_export-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test_ncnn_export:
    if: github.event.label.name == 'ready' || github.event.label.name == 'ncnn' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Test ncnn export
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/test-ncnn-export.sh
--- a/.github/workflows/test-onnx-export.yml
+++ b/.github/workflows/test-onnx-export.yml
@ -0,0 +1,77 @@
 name: test-onnx-export
 on:
  push:
    branches:
      - master
  pull_request:
    types: [labeled]
  schedule:
    # minute (0-59)
    # hour (0-23)
    # day of the month (1-31)
    # month (1-12)
    # day of the week (0-6)
    # nightly build at 15:50 UTC time every day
    - cron: "50 15 * * *"
  workflow_dispatch:
 concurrency:
  group: test_onnx_export-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  test_onnx_export:
    if: github.event.label.name == 'ready' || github.event.label.name == 'onnx' || github.event_name == 'push' || github.event_name == 'schedule'
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python-version: [3.8]
      fail-fast: false
    steps:
      - uses: actions/checkout@v2
        with:
          fetch-depth: 0
      - name: Setup Python ${{ matrix.python-version }}
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
          cache: 'pip'
          cache-dependency-path: '**/requirements-ci.txt'
      - name: Install Python dependencies
        run: |
          grep -v '^#' ./requirements-ci.txt  | xargs -n 1 -L 1 pip install
          pip uninstall -y protobuf
          pip install --no-binary protobuf protobuf==3.20.*
      - name: Cache kaldifeat
        id: my-cache
        uses: actions/cache@v2
        with:
          path: |
            ~/tmp/kaldifeat
          key: cache-tmp-${{ matrix.python-version }}-2023-05-22
      - name: Install kaldifeat
        if: steps.my-cache.outputs.cache-hit != 'true'
        shell: bash
        run: |
          .github/scripts/install-kaldifeat.sh
      - name: Test ONNX export
        shell: bash
        env:
          GITHUB_EVENT_NAME: ${{ github.event_name }}
          GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
        run: |
          export PYTHONPATH=$PWD:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
          export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
          .github/scripts/test-onnx-export.sh
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -1,71 +1,111 @@
 # Copyright      2021  Fangjun Kuang (csukuangfj@gmail.com)
 # See ../../LICENSE for clarification regarding multiple authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 name: test
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: test-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          echo "::set-output name=matrix::${MATRIX}"
  test:
-    runs-on: ${{ matrix.os }}
+    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      matrix:
        os: [ubuntu-18.04, macos-10.15]
        python-version: [3.6, 3.7, 3.8, 3.9]
        torch: ["1.8.1"]
        k2-version: ["1.4.dev20210822"]
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
-      - name: Setup Python ${{ matrix.python-version }}
+      - name: Free space
-        uses: actions/setup-python@v1
+        shell: bash
        run: |
          df -h
          rm -rf /opt/hostedtoolcache
          df -h
          echo "pwd: $PWD"
          echo "github.workspace ${{ github.workspace }}"
      - name: Run tests
        uses: addnab/docker-run-action@v3
        with:
-          python-version: ${{ matrix.python-version }}
+            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
-      - name: Install Python dependencies
+              pytest -v -s ./test
        run: |
          python3 -m pip install --upgrade pip pytest
          pip install k2==${{ matrix.k2-version }}+cpu.torch${{ matrix.torch }} -f https://k2-fsa.org/nightly/
          # icefall requirements
          pip install -r requirements.txt
-      - name: Run tests
+              # runt tests for conformer ctc
-        if: startsWith(matrix.os, 'ubuntu')
+              cd egs/librispeech/ASR/conformer_ctc
-        run: |
+              pytest -v -s
          ls -lh
          export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH
          echo $PYTHONPATH
          pytest ./test
-      - name: Run tests
+              cd ../pruned_transducer_stateless
-        if: startsWith(matrix.os, 'macos')
+              pytest -v -s
-        run: |
+
-          ls -lh
+              cd ../pruned_transducer_stateless2
-          export PYTHONPATH=$PWD:$PWD/lhotse:$PYTHONPATH
+              pytest -v -s
-          lib_path=$(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")
+
-          echo "lib_path: $lib_path"
+              cd ../pruned_transducer_stateless3
-          export DYLD_LIBRARY_PATH=$lib_path:$DYLD_LIBRARY_PATH
+              pytest -v -s
-          pytest ./test
+
              cd ../pruned_transducer_stateless4
              pytest -v -s
              echo $PYTHONPATH
              cd ../pruned_transducer_stateless7
              pytest -v -s
              cd ../transducer_stateless
              pytest -v -s
              # cd ../transducer
              # pytest -v -s
              cd ../transducer_stateless2
              pytest -v -s
              cd ../transducer_lstm
              pytest -v -s
              cd ../zipformer
              pytest -v -s
      - uses: actions/upload-artifact@v4
        with:
          path: egs/librispeech/ASR/zipformer/swoosh.pdf
          name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }}
--- a/.github/workflows/yesno.yml
+++ b/.github/workflows/yesno.yml
@ -0,0 +1,67 @@
 name: yesno
 on:
  push:
    branches:
      - master
  pull_request:
    branches:
      - master
  workflow_dispatch:
 concurrency:
  group: yesno-${{ github.ref }}
  cancel-in-progress: true
 jobs:
  generate_build_matrix:
    if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
    # see https://github.com/pytorch/pytorch/pull/50633
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Generating build matrix
        id: set-matrix
        run: |
          # outputting for debugging purposes
          python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
          MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
          # MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.5.0")
          echo "::set-output name=matrix::${MATRIX}"
  yesno:
    needs: generate_build_matrix
    name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
    runs-on: ubuntu-latest
    strategy:
      fail-fast: false
      matrix:
        ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Run the yesno recipe
        uses: addnab/docker-run-action@v3
        with:
            image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
            options: |
              --volume ${{ github.workspace }}/:/icefall
            shell: bash
            run: |
              export PYTHONPATH=/icefall:$PYTHONPATH
              cd /icefall
              git config --global --add safe.directory /icefall
              python3 -m torch.utils.collect_env
              python3 -m k2.version
              pip list
              .github/scripts/yesno/ASR/run.sh
--- a/.gitignore
+++ b/.gitignore
@ -1,7 +1,38 @@
 icefall.egg-info/
 data
 __pycache__
 path.sh
 exp
 exp*/
 *.pt
-download/
+download
 dask-worker-space
 log
 *.bak
 *-bak
 *bak.py
 # Ignore Mac system files
 .DS_store
 # Ignore node_modules folder
 node_modules
 # ignore .nfs
 .nfs*
 # Ignore all text files
 *.txt
 # Ignore files related to API keys
 .env
 # Ignore SASS config files
 .sass-cache
 *.param
 *.bin
 .DS_Store
 *.fst
 *.arpa
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,24 +1,38 @@
 repos:
  - repo: https://github.com/psf/black
-    rev: 21.6b0
+    rev: 22.3.0
    hooks:
      - id: black
-        args: [--line-length=80]
+        args: ["--line-length=88"]
        additional_dependencies: ['click==8.1.0']
        exclude: icefall\/__init__\.py
  - repo: https://github.com/PyCQA/flake8
-    rev: 3.9.2
+    rev: 5.0.4
    hooks:
      - id: flake8
-        args: [--max-line-length=80]
+        args: ["--max-line-length=88", "--extend-ignore=E203,E266,E501,F401,E402,F403,F841,W503"]
      # What are we ignoring here?
      # E203: whitespace before ':'
      # E266: too many leading '#' for block comment
      # E501: line too long
      # F401: module imported but unused
      # E402: module level import not at top of file
      # F403: 'from module import *' used; unable to detect undefined names
      # F841: local variable is assigned to but never used
      # W503: line break before binary operator
      # In addition, the default ignore list is:
      # E121,E123,E126,E226,E24,E704,W503,W504
  - repo: https://github.com/pycqa/isort
-    rev: 5.9.2
+    rev: 5.12.0
    hooks:
      - id: isort
-        args: [--profile=black, --line-length=80]
+        args: ["--profile=black"]
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v4.2.0
    hooks:
      - id: check-executables-have-shebangs
      - id: end-of-file-fixer
--- a/9
+++ b/9
@ -1,13 +1,4 @@
                                 Legal Notices
   NOTE (this is not from the Apache License): The copyright model is that
   authors (or their employers, if noted in individual files) own their
   individual contributions. The authors' contributions can be discerned
   from the git history.
 -------------------------------------------------------------------------
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
--- a/README.md
+++ b/README.md
@ -2,22 +2,85 @@
 <img src="https://raw.githubusercontent.com/k2-fsa/icefall/master/docs/source/_static/logo.png" width=168>
 </div>
-## Installation
+# Introduction
-Please refer to <https://icefall.readthedocs.io/en/latest/installation/index.html>
+The icefall project contains speech-related recipes for various datasets
 using [k2-fsa](https://github.com/k2-fsa/k2) and [lhotse](https://github.com/lhotse-speech/lhotse).
 You can use [sherpa](https://github.com/k2-fsa/sherpa), [sherpa-ncnn](https://github.com/k2-fsa/sherpa-ncnn) or [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) for deployment with models
 in icefall; these frameworks also support models not included in icefall; please refer to respective documents for more details.
 You can try pre-trained models from within your browser without the need
 to download or install anything by visiting this [huggingface space](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition).
 Please refer to [document](https://k2-fsa.github.io/icefall/huggingface/spaces.html) for more details.
 # Installation
 Please refer to [document](https://k2-fsa.github.io/icefall/installation/index.html)
 for installation.
-## Recipes
+# Recipes
-Please refer to <https://icefall.readthedocs.io/en/latest/recipes/index.html>
+Please refer to [document](https://k2-fsa.github.io/icefall/recipes/index.html)
-for more information.
+for more details.
-We provide two recipes at present:
+## ASR: Automatic Speech Recognition
 ### Supported Datasets
  - [yesno][yesno]
  - [LibriSpeech][librispeech]
-### yesno
+  - [Aidatatang_200zh][aidatatang_200zh]
  - [Aishell][aishell]
  - [Aishell2][aishell2]
  - [Aishell4][aishell4]
  - [Alimeeting][alimeeting]
  - [AMI][ami]
  - [CommonVoice][commonvoice]
  - [Corpus of Spontaneous Japanese][csj]
  - [GigaSpeech][gigaspeech]
  - [LibriCSS][libricss]
  - [LibriSpeech][librispeech]
  - [Libriheavy][libriheavy]
  - [Multi-Dialect Broadcast News Arabic Speech Recognition][mgb2]
  - [SPGISpeech][spgispeech]
  - [Switchboard][swbd]
  - [TIMIT][timit]
  - [TED-LIUM3][tedlium3]
  - [TAL_CSASR][tal_csasr]
  - [Voxpopuli][voxpopuli]
  - [XBMU-AMDO31][xbmu-amdo31]
  - [WenetSpeech][wenetspeech]
 More datasets will be added in the future.
 ### Supported Models
 The [LibriSpeech][librispeech] recipe supports the most comprehensive set of models, you are welcome to try them out.
 #### CTC 
  - TDNN LSTM CTC
  - Conformer CTC
  - Zipformer CTC
 #### MMI
  - Conformer MMI
  - Zipformer MMI
 #### Transducer
  - Conformer-based Encoder
  - LSTM-based Encoder
  - Zipformer-based Encoder
  - LSTM-based Predictor
  - [Stateless Predictor](https://research.google/pubs/rnn-transducer-with-stateless-prediction-network/)
 #### Whisper
  - [OpenAi Whisper](https://arxiv.org/abs/2212.04356) (We support fine-tuning on AiShell-1.)
 If you are willing to contribute to icefall, please refer to [contributing](https://k2-fsa.github.io/icefall/contributing/index.html) for more details.
 We would like to highlight the performance of some of the recipes here.
 ### [yesno][yesno]
 This is the simplest ASR recipe in `icefall` and can be run on CPU.
 Training takes less than 30 seconds and gives you the following WER:
@ -25,37 +88,302 @@ Training takes less than 30 seconds and gives you the following WER:
 ```
 [test_set] %WER 0.42% [1 / 240, 0 ins, 1 del, 0 sub ]
 ```
-We do provide a Colab notebook for this recipe.
+We provide a Colab notebook for this recipe: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tIjjzaJc3IvGyKiMCDWO-TSnBgkcuN3B?usp=sharing)
-### LibriSpeech
+### [LibriSpeech][librispeech]
-We provide two models for this recipe: [conformer CTC model][LibriSpeech_conformer_ctc]
+Please see [RESULTS.md](https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/RESULTS.md)
-and [TDNN LSTM CTC model][LibriSpeech_tdnn_lstm_ctc].
+for the **latest** results.
-#### Conformer CTC Model
+#### [Conformer CTC](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/conformer_ctc)
-The best WER we currently have is:
+|     | test-clean | test-other |
 |-----|------------|------------|
 | WER | 2.42       | 5.73       |
 ||test-clean|test-other|
 |--|--|--|
 |WER| 2.57% | 5.94% |
-We provide a Colab notebook to run a pre-trained conformer CTC model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing)
+We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1huyupXAcHsUrKaWfI83iMEJ6J0Nh0213?usp=sharing)
-#### TDNN LSTM CTC Model
+#### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/tdnn_lstm_ctc)
-The WER for this model is:
+|     | test-clean | test-other |
 |-----|------------|------------|
 | WER | 6.59       | 17.69      |
-||test-clean|test-other|
+We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1-iSfQMp2So-We_Uu49N4AAcMInB72u9z?usp=sharing)
-|--|--|--|
+
-|WER| 6.59% | 17.69% |
+
 #### [Transducer (Conformer Encoder + LSTM Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/transducer)
 |               | test-clean | test-other |
 |---------------|------------|------------|
 | greedy_search | 3.07       | 7.51       |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_u6yK9jDkPwG_NLrZMN2XK7Aeq4suMO2?usp=sharing)
 #### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/transducer)
 |                                       | test-clean | test-other |
 |---------------------------------------|------------|------------|
 | modified_beam_search (`beam_size=4`) | 2.56       | 6.27       |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CO1bXJ-2khDckZIW8zjOPHGSKLHpTDlp?usp=sharing)
 #### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/zipformer)
 WER (modified_beam_search `beam_size=4` unless further stated) 
 1. LibriSpeech-960hr
 | Encoder         | Params | test-clean | test-other | epochs  | devices    |
 |-----------------|--------|------------|------------|---------|------------|
 | Zipformer       | 65.5M  | 2.21       | 4.79       | 50      | 4 32G-V100 |
 | Zipformer-small | 23.2M  | 2.42       | 5.73       | 50      | 2 32G-V100 |
 | Zipformer-large | 148.4M | 2.06       | 4.63       | 50      | 4 32G-V100 |
 | Zipformer-large | 148.4M | 2.00       | 4.38       | 174     | 8 80G-A100 |
 2. LibriSpeech-960hr + GigaSpeech
 | Encoder         | Params | test-clean | test-other |
 |-----------------|--------|------------|------------|
 | Zipformer       | 65.5M   | 1.78       | 4.08       |
 3. LibriSpeech-960hr + GigaSpeech + CommonVoice
 | Encoder         | Params | test-clean | test-other |
 |-----------------|--------|------------|------------|
 | Zipformer       | 65.5M   | 1.90       | 3.98       |
 ### [GigaSpeech][gigaspeech]
 #### [Conformer CTC](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/conformer_ctc)
 |     |  Dev  | Test  |
 |-----|-------|-------|
 | WER | 10.47 | 10.58 |
 #### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/pruned_transducer_stateless2)
 Conformer Encoder + Stateless Predictor + k2 Pruned RNN-T Loss
 |                      |  Dev  | Test  |
 |----------------------|-------|-------|
 |    greedy_search     | 10.51 | 10.73 |
 |   fast_beam_search   | 10.50 | 10.69 |
 | modified_beam_search | 10.40 | 10.51 |
 #### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/gigaspeech/ASR/zipformer)
 |                      |  Dev  | Test  |
 |----------------------|-------|-------|
 |    greedy_search     | 10.31 | 10.50 |
 |   fast_beam_search   | 10.26 | 10.48 |
 | modified_beam_search | 10.25 | 10.38 |
 ### [Aishell][aishell]
 #### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/tdnn_lstm_ctc)
 |     | test  |
 |-----|-------|
 | CER | 10.16 |
 We provide a Colab notebook to test the pre-trained model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1jbyzYq3ytm6j2nlEt-diQm-6QVWyDDEa?usp=sharing)
 #### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/transducer_stateless)
 |     | test |
 |-----|------|
 | CER | 4.38 |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/14XaT2MhnBkK-3_RqqWq3K90Xlbin-GZC?usp=sharing)
 #### [Transducer (Zipformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell/ASR/zipformer)
 WER (modified_beam_search `beam_size=4`) 
 | Encoder         | Params | dev | test | epochs  |
 |-----------------|--------|-----|------|---------|
 | Zipformer       | 73.4M  | 4.13| 4.40 | 55      |
 | Zipformer-small | 30.2M  | 4.40| 4.67 | 55      |
 | Zipformer-large | 157.3M | 4.03| 4.28 | 56      |
 ### [Aishell4][aishell4]
 #### [Transducer (pruned_transducer_stateless5)](https://github.com/k2-fsa/icefall/tree/master/egs/aishell4/ASR/pruned_transducer_stateless5)
 1 Trained with all subsets: 
 |     |   test     |
 |-----|------------|
 | CER |   29.08    |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
 ### [TIMIT][timit]
 #### [TDNN LSTM CTC](https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_lstm_ctc)
 |   |TEST|
 |---|----|
 |PER| 19.71% |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Hs9DA4V96uapw_30uNp32OMJgkuR5VVd?usp=sharing)
 #### [TDNN LiGRU CTC](https://github.com/k2-fsa/icefall/tree/master/egs/timit/ASR/tdnn_ligru_ctc)
 |   |TEST|
 |---|----|
 |PER| 17.66% |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1z3lkURVv9M7uTiIgf3Np9IntMHEknaks?usp=sharing)
 ### [TED-LIUM3][tedlium3]
 #### [Transducer (Conformer Encoder + Stateless Predictor)](https://github.com/k2-fsa/icefall/tree/master/egs/tedlium3/ASR/transducer_stateless)
 |                                      |  dev  |  test  |
 |--------------------------------------|-------|--------|
 | modified_beam_search (`beam_size=4`) |  6.91 |  6.33  |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MmY5bBxwvKLNT4A2DJnwiqRXhdchUqPN?usp=sharing)
 #### [Transducer (pruned_transducer_stateless)](https://github.com/k2-fsa/icefall/tree/master/egs/tedlium3/ASR/pruned_transducer_stateless)
 |                                      |  dev  |  test  |
 |--------------------------------------|-------|--------|
 | modified_beam_search (`beam_size=4`) |  6.77 |  6.14  |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1je_1zGrOkGVVd4WLzgkXRHxl-I27yWtz?usp=sharing)
 ### [Aidatatang_200zh][aidatatang_200zh]
 #### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/aidatatang_200zh/ASR/pruned_transducer_stateless2)
 |                      |  Dev  | Test  |
 |----------------------|-------|-------|
 |    greedy_search     | 5.53  | 6.59  |
 |   fast_beam_search   | 5.30  | 6.34  |
 | modified_beam_search | 5.27  | 6.33  |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1wNSnSj3T5oOctbh5IGCa393gKOoQw2GH?usp=sharing)
 ### [WenetSpeech][wenetspeech]
 #### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/wenetspeech/ASR/pruned_transducer_stateless2)
 |                      |  Dev  | Test-Net | Test-Meeting |
 |----------------------|-------|----------|--------------|
 |    greedy_search     | 7.80  |  8.75    |  13.49       |
 |   fast_beam_search   | 7.94  |  8.74    |  13.80       |
 | modified_beam_search | 7.76  |  8.71    |  13.41       |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EV4e1CHa1GZgEF-bZgizqI9RyFFehIiN?usp=sharing)
 #### [Transducer **Streaming** (pruned_transducer_stateless5) ](https://github.com/k2-fsa/icefall/tree/master/egs/wenetspeech/ASR/pruned_transducer_stateless5)
 |                      |  Dev  | Test-Net | Test-Meeting |
 |----------------------|-------|----------|--------------|
 | greedy_search | 8.78 | 10.12 | 16.16 |
 | fast_beam_search| 9.01 | 10.47 | 16.28 |
 | modified_beam_search | 8.53| 9.95 | 15.81 |
 ### [Alimeeting][alimeeting]
 #### [Transducer (pruned_transducer_stateless2)](https://github.com/k2-fsa/icefall/tree/master/egs/alimeeting/ASR/pruned_transducer_stateless2)
 |                      |  Eval  | Test-Net |
 |----------------------|--------|----------|
 |    greedy_search     | 31.77  |  34.66   |
 |   fast_beam_search   | 31.39  |  33.02   |
 | modified_beam_search | 30.38  |  34.25   |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tKr3f0mL17uO_ljdHGKtR7HOmthYHwJG?usp=sharing)
 ### [TAL_CSASR][tal_csasr]
 #### [Transducer (pruned_transducer_stateless5)](https://github.com/k2-fsa/icefall/tree/master/egs/tal_csasr/ASR/pruned_transducer_stateless5)
 The best results for Chinese CER(%) and English WER(%) respectively (zh: Chinese, en: English):
 |decoding-method | dev | dev_zh | dev_en | test | test_zh | test_en |
 |--|--|--|--|--|--|--|
 |greedy_search| 7.30 | 6.48 | 19.19 |7.39| 6.66 | 19.13|
 |fast_beam_search| 7.18 | 6.39| 18.90 |  7.27| 6.55 | 18.77|
 |modified_beam_search| 7.15 | 6.35 | 18.95 | 7.22| 6.50 | 18.70 |
 We provide a Colab notebook to test the pre-trained model: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1DmIx-NloI1CMU5GdZrlse7TRu4y3Dpf8?usp=sharing)
 ## TTS: Text-to-Speech
 ### Supported Datasets
  - [LJSpeech][ljspeech]
  - [VCTK][vctk]
  - [LibriTTS][libritts_tts]
 ### Supported Models
  - [VITS](https://arxiv.org/abs/2106.06103)
 # Deployment with C++
 Once you have trained a model in icefall, you may want to deploy it with C++ without Python dependencies.
 Please refer to
  - https://k2-fsa.github.io/icefall/model-export/export-with-torch-jit-script.html
  - https://k2-fsa.github.io/icefall/model-export/export-onnx.html
  - https://k2-fsa.github.io/icefall/model-export/export-ncnn.html
 for how to do this.
 We also provide a Colab notebook, showing you how to run a torch scripted model in [k2][k2] with C++.
 Please see: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1BIGLWzS36isskMXHKcqC9ysN6pspYXs_?usp=sharing)
 We provide a Colab notebook to run a pre-trained TDNN LSTM CTC model:  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1kNmDXNMwREi0rZGAOIAOJo93REBuOTcd?usp=sharing)
 [LibriSpeech_tdnn_lstm_ctc]: egs/librispeech/ASR/tdnn_lstm_ctc
 [LibriSpeech_conformer_ctc]: egs/librispeech/ASR/conformer_ctc
 [yesno]: egs/yesno/ASR
 [librispeech]: egs/librispeech/ASR
 [aishell]: egs/aishell/ASR
 [aishell2]: egs/aishell2/ASR
 [aishell4]: egs/aishell4/ASR
 [timit]: egs/timit/ASR
 [tedlium3]: egs/tedlium3/ASR
 [gigaspeech]: egs/gigaspeech/ASR
 [aidatatang_200zh]: egs/aidatatang_200zh/ASR
 [wenetspeech]: egs/wenetspeech/ASR
 [alimeeting]: egs/alimeeting/ASR
 [tal_csasr]: egs/tal_csasr/ASR
 [ami]: egs/ami
 [swbd]: egs/swbd/ASR
 [k2]: https://github.com/k2-fsa/k2
 [commonvoice]: egs/commonvoice/ASR
 [csj]: egs/csj/ASR
 [libricss]: egs/libricss/SURT
 [libritts_asr]: egs/libritts/ASR
 [libriheavy]: egs/libriheavy/ASR
 [mgb2]: egs/mgb2/ASR
 [spgispeech]: egs/spgispeech/ASR
 [voxpopuli]: egs/voxpopuli/ASR
 [xbmu-amdo31]: egs/xbmu-amdo31/ASR
 [vctk]: egs/vctk/TTS
 [ljspeech]: egs/ljspeech/TTS
 [libritts_tts]: egs/libritts/TTS
 ## Acknowledgements
 Some contributors to this project were supported by Xiaomi Corporation. Others were supported by National Science Foundation CCRI award 2120435.  This is not an exhaustive list of sources of support.
--- a/contributing.md
+++ b/contributing.md
@ -1,39 +1,37 @@
 # Contributing to Our Project
-## Pre-commit hooks
+Thank you for your interest in contributing to our project! We use Git pre-commit hooks to ensure code quality and consistency. Before contributing, please follow these guidelines to enable and use the pre-commit hooks.
-We use [git][git] [pre-commit][pre-commit] [hooks][hooks] to check that files
+## Pre-Commit Hooks
 going to be committed:
-  - contain no trailing spaces
+We have set up pre-commit hooks to check that the files you're committing meet our coding and formatting standards. These checks include:
  - are formatted with [black][black]
  - are compatible to [PEP8][PEP8] (checked by [flake8][flake8])
  - end in a newline and only a newline
  - contain sorted `imports` (checked by [isort][isort])
-These hooks are disabled by default. Please use the following commands to enable them:
+- Ensuring there are no trailing spaces.
 - Formatting code with [black](https://github.com/psf/black).
 - Checking compliance with PEP8 using [flake8](https://flake8.pycqa.org/).
 - Verifying that files end with a newline character (and only a newline).
 - Sorting imports using [isort](https://pycqa.github.io/isort/).
-```bash
+Please note that these hooks are disabled by default. To enable them, follow these steps:
 pip install pre-commit  # run it only once
 pre-commit install      # run it only once, it will install all hooks
-# modify some files
+### Installation (Run only once)
 git add <some files>
 git commit              # It runs all hooks automatically.
-# If all hooks run successfully, you can write the commit message now. Done!
+1. Install the `pre-commit` package using pip:
-#
+   ```bash
-# If any hook failed, your commit was not successful.
+   pip install pre-commit
-# Please read the error messages and make changes accordingly.
+   ```
-# And rerun
+1. Install the Git hooks using:
   ```bash
   pre-commit install
   ```
 ### Making a Commit
 Once you have enabled the pre-commit hooks, follow these steps when making a commit:
 1. Make your changes to the codebase.
 2. Stage your changes by using git add for the files you modified.
 3. Commit your changes using git commit. The pre-commit hooks will run automatically at this point.
 4. If all hooks run successfully, you can write your commit message, and your changes will be successfully committed.
 5. If any hook fails, your commit will not be successful. Please read and follow the error messages provided, make the necessary changes, and then re-run git add and git commit.
-git add <some files>
+### Your Contribution
-git commit
+Your contributions are valuable to us, and by following these guidelines, you help maintain code consistency and quality in our project. We appreciate your dedication to ensuring high-quality code. If you have questions or need assistance, feel free to reach out to us. Thank you for being part of our open-source community!
 ```
 [git]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks
 [flake8]: https://github.com/PyCQA/flake8
 [PEP8]: https://www.python.org/dev/peps/pep-0008/
 [black]: https://github.com/psf/black
 [hooks]: https://github.com/pre-commit/pre-commit-hooks
 [pre-commit]: https://github.com/pre-commit/pre-commit
 [isort]: https://github.com/PyCQA/isort
--- a/docker/README.md
+++ b/docker/README.md
@ -0,0 +1,129 @@
 # icefall dockerfile
 ## Download from dockerhub
 You can find pre-built docker image for icefall at the following address:
  <https://hub.docker.com/r/k2fsa/icefall/tags>
 Example usage:
 ```bash
 docker run --gpus all --rm -it  k2fsa/icefall:torch1.13.0-cuda11.6 /bin/bash
 ```
 ## Build from dockerfile
 2 sets of configuration are provided - (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8, and (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
 If your NVIDIA driver supports CUDA Version: 11.3, please go for case (a) Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8.
 Otherwise, since the older PyTorch images are not updated with the [apt-key rotation by NVIDIA](https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key), you have to go for case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8. Ensure that your NVDIA driver supports at least CUDA 11.0.
 You can check the highest CUDA version within your NVIDIA driver's support with the `nvidia-smi` command below. In this example, the highest CUDA version is 11.0, i.e. case (b) Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8.
 ```bash
 $ nvidia-smi
 Tue Sep 20 00:26:13 2022
 +-----------------------------------------------------------------------------+
 | NVIDIA-SMI 450.119.03   Driver Version: 450.119.03   CUDA Version: 11.0     |
 |-------------------------------+----------------------+----------------------+
 | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
 | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
 |                               |                      |               MIG M. |
 |===============================+======================+======================|
 |   0  TITAN RTX           On   | 00000000:03:00.0 Off |                  N/A |
 | 41%   31C    P8     4W / 280W |     16MiB / 24219MiB |      0%      Default |
 |                               |                      |                  N/A |
 +-------------------------------+----------------------+----------------------+
 |   1  TITAN RTX           On   | 00000000:04:00.0 Off |                  N/A |
 | 41%   30C    P8    11W / 280W |      6MiB / 24220MiB |      0%      Default |
 |                               |                      |                  N/A |
 +-------------------------------+----------------------+----------------------+
 +-----------------------------------------------------------------------------+
 | Processes:                                                                  |
 |  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
 |        ID   ID                                                   Usage      |
 |=============================================================================|
 |    0   N/A  N/A      2085      G   /usr/lib/xorg/Xorg                  9MiB |
 |    0   N/A  N/A      2240      G   /usr/bin/gnome-shell                4MiB |
 |    1   N/A  N/A      2085      G   /usr/lib/xorg/Xorg                  4MiB |
 +-----------------------------------------------------------------------------+
 ```
 ## Building images locally
 If your environment requires a proxy to access the Internet, remember to add those information into the Dockerfile directly.
 For most cases, you can uncomment these lines in the Dockerfile and add in your proxy details.
 ```dockerfile
 ENV http_proxy=http://aaa.bb.cc.net:8080 \
    https_proxy=http://aaa.bb.cc.net:8080
 ```
 Then, proceed with these commands.
 ### If you are case (a), i.e. your NVIDIA driver supports CUDA version >= 11.3:
 ```bash
 cd docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8
 docker build -t icefall/pytorch1.12.1 .
 ```
 ### If you are case (b), i.e. your NVIDIA driver can only support CUDA versions 11.0 <= x < 11.3:
 ```bash
 cd docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8
 docker build -t icefall/pytorch1.7.1 .
 ```
 ## Running your built local image
 Sample usage of the GPU based images. These commands are written with case (a) in mind, so please make the necessary changes to your image name if you are case (b).
 Note: use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) to run the GPU images.
 ```bash
 docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all icefall/pytorch1.12.1
 ```
 ### Tips:
 1. Since your data and models most probably won't be in the docker, you must use the -v flag to access the host machine. Do this by specifying `-v {/path/in/host/machine}:{/path/in/docker}`.
 2. Also, if your environment requires a proxy, this would be a good time to add it in too: `-e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080`.
 Overall, your docker run command should look like this.
 ```bash
 docker run -it --runtime=nvidia --shm-size=2gb --name=icefall --gpus all -v {/path/in/host/machine}:{/path/in/docker} -e http_proxy=http://aaa.bb.cc.net:8080 -e https_proxy=http://aaa.bb.cc.net:8080 icefall/pytorch1.12.1
 ```
 You can explore more docker run options [here](https://docs.docker.com/engine/reference/commandline/run/) to suit your environment.
 ### Linking to icefall in your host machine
 If you already have icefall downloaded onto your host machine, you can use that repository instead so that changes in your code are visible inside and outside of the container.
 Note: Remember to set the -v flag above during the first run of the container, as that is the only way for your container to access your host machine.
 Warning: Check that the icefall in your host machine is visible from within your container before proceeding to the commands below.
 Use these commands once you are inside the container.
 ```bash
 rm -r /workspace/icefall
 ln -s {/path/in/docker/to/icefall} /workspace/icefall
 ```
 ## Starting another session in the same running container.
 ```bash
 docker exec -it icefall /bin/bash
 ```
 ## Restarting a killed container that has been run before.
 ```bash
 docker start -ai icefall
 ```
 ## Sample usage of the CPU based images:
 ```bash
 docker run -it icefall /bin/bash
 ```
--- a/docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8/Dockerfile
+++ b/docker/Ubuntu18.04-pytorch1.12.1-cuda11.3-cudnn8/Dockerfile
@ -0,0 +1,74 @@
 FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
 # ENV http_proxy=http://aaa.bbb.cc.net:8080 \
 #	https_proxy=http://aaa.bbb.cc.net:8080
 # install normal source
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        g++ \
        make \
        automake \
        autoconf \
        bzip2 \
        unzip \
        wget \
        sox \
        libtool \
        git \
        subversion \
        zlib1g-dev \
        gfortran \
        ca-certificates \
        patch \
        ffmpeg \
        valgrind \
 		libssl-dev \
 	    vim \
 		curl
 # cmake
 RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
    cd /opt && \
    tar -zxvf cmake-3.18.0.tar.gz && \
    cd cmake-3.18.0 && \
    ./bootstrap && \
    make && \
    make install && \
    rm -rf cmake-3.18.0.tar.gz && \
    find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
 # flac
 RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  && \
    cd /opt && \
    xz -d flac-1.3.2.tar.xz && \
    tar -xvf flac-1.3.2.tar && \
    cd flac-1.3.2 && \
    ./configure && \
    make && make install && \
    rm -rf flac-1.3.2.tar && \
    find /opt/flac-1.3.2  -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
 RUN conda install -y -c pytorch torchaudio=0.12 && \
    pip install graphviz
 #install k2 from source
 RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
    cd /opt/k2 && \
    python3 setup.py install && \
    cd -
 # install  lhotse
 RUN pip install git+https://github.com/lhotse-speech/lhotse
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 	cd /workspace/icefall && \
 	pip install -r requirements.txt
 RUN pip install kaldifeat
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8/Dockerfile
+++ b/docker/Ubuntu18.04-pytorch1.7.1-cuda11.0-cudnn8/Dockerfile
@ -0,0 +1,90 @@
 FROM pytorch/pytorch:1.7.1-cuda11.0-cudnn8-devel
 # ENV http_proxy=http://aaa.bbb.cc.net:8080 \
 #	https_proxy=http://aaa.bbb.cc.net:8080
 RUN rm /etc/apt/sources.list.d/cuda.list && \
 	rm /etc/apt/sources.list.d/nvidia-ml.list && \
 	apt-key del 7fa2af80
 # install normal source
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        g++ \
        make \
        automake \
        autoconf \
        bzip2 \
        unzip \
        wget \
        sox \
        libtool \
        git \
        subversion \
        zlib1g-dev \
        gfortran \
        ca-certificates \
        patch \
        ffmpeg \
        valgrind \
 		libssl-dev \
 	    vim \
 		curl
 # Add new keys and reupdate
 RUN curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub | apt-key add - && \
 	curl -fsSL https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
 	echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
 	echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
 	rm -rf /var/lib/apt/lists/* && \
 	mv /opt/conda/lib/libcufft.so.10 /opt/libcufft.so.10.bak && \
    mv /opt/conda/lib/libcurand.so.10 /opt/libcurand.so.10.bak && \
    mv /opt/conda/lib/libcublas.so.11 /opt/libcublas.so.11.bak && \
    mv /opt/conda/lib/libnvrtc.so.11.0 /opt/libnvrtc.so.11.1.bak && \
    # mv /opt/conda/lib/libnvToolsExt.so.1 /opt/libnvToolsExt.so.1.bak && \
    mv /opt/conda/lib/libcudart.so.11.0 /opt/libcudart.so.11.0.bak && \
 	apt-get update && apt-get -y upgrade
 # cmake
 RUN wget -P /opt https://cmake.org/files/v3.18/cmake-3.18.0.tar.gz && \
    cd /opt && \
    tar -zxvf cmake-3.18.0.tar.gz && \
    cd cmake-3.18.0 && \
    ./bootstrap && \
    make && \
    make install && \
    rm -rf cmake-3.18.0.tar.gz && \
    find /opt/cmake-3.18.0 -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
 # flac
 RUN wget -P /opt https://downloads.xiph.org/releases/flac/flac-1.3.2.tar.xz  && \
    cd /opt && \
    xz -d flac-1.3.2.tar.xz && \
    tar -xvf flac-1.3.2.tar && \
    cd flac-1.3.2 && \
    ./configure && \
    make && make install && \
    rm -rf flac-1.3.2.tar && \
    find /opt/flac-1.3.2  -type f \( -name "*.o" -o -name "*.la" -o -name "*.a" \) -exec rm {} \; && \
    cd -
 RUN conda install -y -c pytorch torchaudio=0.7.1 && \
    pip install graphviz
 #install k2 from source
 RUN git clone https://github.com/k2-fsa/k2.git /opt/k2 && \
    cd /opt/k2 && \
    python3 setup.py install && \
    cd -
 # install  lhotse
 RUN pip install git+https://github.com/lhotse-speech/lhotse
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
 	cd /workspace/icefall && \
 	pip install -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch1.12.1-cuda11.3.dockerfile
+++ b/docker/torch1.12.1-cuda11.3.dockerfile
@ -0,0 +1,72 @@
 FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.7
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.3.torch1.12.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.3.torch1.12.1"
 ARG TORCHAUDIO_VERSION="0.12.1+cu113"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch1.13.0-cuda11.6.dockerfile
+++ b/docker/torch1.13.0-cuda11.6.dockerfile
@ -0,0 +1,74 @@
 FROM pytorch/pytorch:1.13.0-cuda11.6-cudnn8-runtime
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.9
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.6.torch1.13.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.6.torch1.13.0"
 ARG TORCHAUDIO_VERSION="0.13.0+cu116"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 ENV LD_LIBRARY_PATH /opt/conda/lib/stubs:$LD_LIBRARY_PATH
 WORKDIR /workspace/icefall
--- a/docker/torch1.9.0-cuda10.2.dockerfile
+++ b/docker/torch1.9.0-cuda10.2.dockerfile
@ -0,0 +1,88 @@
 FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.7
 ARG K2_VERSION="1.24.4.dev20240223+cuda10.2.torch1.9.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda10.2.torch1.9.0"
 ARG TORCHAUDIO_VERSION="0.9.0"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 # see https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key/
 RUN rm /etc/apt/sources.list.d/cuda.list && \
 	rm /etc/apt/sources.list.d/nvidia-ml.list && \
 	apt-key del 7fa2af80
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
    dpkg -i cuda-keyring_1.0-1_all.deb && \
    rm -v cuda-keyring_1.0-1_all.deb && \
    apt-get update && \
    rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip uninstall -y tqdm && \
    pip install -U --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torch_stable.html \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz \
      tqdm>=4.63.0
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.0.0-cuda11.7.dockerfile
+++ b/docker/torch2.0.0-cuda11.7.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.7.torch2.0.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.7.torch2.0.0"
 ARG TORCHAUDIO_VERSION="2.0.0+cu117"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.1.0-cuda11.8.dockerfile
+++ b/docker/torch2.1.0-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.1.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.1.0"
 ARG TORCHAUDIO_VERSION="2.1.0+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.1.0-cuda12.1.dockerfile
+++ b/docker/torch2.1.0-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.1.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.1.0"
 ARG TORCHAUDIO_VERSION="2.1.0+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.0-cuda11.8.dockerfile
+++ b/docker/torch2.2.0-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.0-cuda11.8-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.2.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.2.0"
 ARG TORCHAUDIO_VERSION="2.2.0+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.0-cuda12.1.dockerfile
+++ b/docker/torch2.2.0-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.0-cuda12.1-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.2.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.2.0"
 ARG TORCHAUDIO_VERSION="2.2.0+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.1-cuda11.8.dockerfile
+++ b/docker/torch2.2.1-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.1-cuda11.8-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda11.8.torch2.2.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda11.8.torch2.2.1"
 ARG TORCHAUDIO_VERSION="2.2.1+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.1-cuda12.1.dockerfile
+++ b/docker/torch2.2.1-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.1-cuda12.1-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240223+cuda12.1.torch2.2.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240223+cuda12.1.torch2.2.1"
 ARG TORCHAUDIO_VERSION="2.2.1+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.2-cuda11.8.dockerfile
+++ b/docker/torch2.2.2-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.2-cuda11.8-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240328+cuda11.8.torch2.2.2"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240329+cuda11.8.torch2.2.2"
 ARG TORCHAUDIO_VERSION="2.2.2+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.2.2-cuda12.1.dockerfile
+++ b/docker/torch2.2.2-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.2.2-cuda12.1-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240328+cuda12.1.torch2.2.2"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240329+cuda12.1.torch2.2.2"
 ARG TORCHAUDIO_VERSION="2.2.2+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.3.1-cuda11.8.dockerfile
+++ b/docker/torch2.3.1-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.3.1-cuda11.8-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240606+cuda11.8.torch2.3.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240606+cuda11.8.torch2.3.1"
 ARG TORCHAUDIO_VERSION="2.3.1+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.3.1-cuda12.1.dockerfile
+++ b/docker/torch2.3.1-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240606+cuda12.1.torch2.3.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240606+cuda12.1.torch2.3.1"
 ARG TORCHAUDIO_VERSION="2.3.1+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.0-cuda11.8.dockerfile
+++ b/docker/torch2.4.0-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.0-cuda11.8-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240725+cuda11.8.torch2.4.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda11.8.torch2.4.0"
 ARG TORCHAUDIO_VERSION="2.4.0+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.0-cuda12.1.dockerfile
+++ b/docker/torch2.4.0-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240725+cuda12.1.torch2.4.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda12.1.torch2.4.0"
 ARG TORCHAUDIO_VERSION="2.4.0+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.0-cuda12.4.dockerfile
+++ b/docker/torch2.4.0-cuda12.4.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240725+cuda12.4.torch2.4.0"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240725+cuda12.4.torch2.4.0"
 ARG TORCHAUDIO_VERSION="2.4.0+cu124"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.1-cuda11.8.dockerfile
+++ b/docker/torch2.4.1-cuda11.8.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.1-cuda11.8-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240905+cuda11.8.torch2.4.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda11.8.torch2.4.1"
 ARG TORCHAUDIO_VERSION="2.4.1+cu118"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.1-cuda12.1.dockerfile
+++ b/docker/torch2.4.1-cuda12.1.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240905+cuda12.1.torch2.4.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.1.torch2.4.1"
 ARG TORCHAUDIO_VERSION="2.4.1+cu121"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docker/torch2.4.1-cuda12.4.dockerfile
+++ b/docker/torch2.4.1-cuda12.4.dockerfile
@ -0,0 +1,73 @@
 FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-devel
 # python 3.10
 ENV LC_ALL C.UTF-8
 ARG DEBIAN_FRONTEND=noninteractive
 # python 3.10
 ARG K2_VERSION="1.24.4.dev20240905+cuda12.4.torch2.4.1"
 ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.4.torch2.4.1"
 ARG TORCHAUDIO_VERSION="2.4.1+cu124"
 LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
 LABEL k2_version=${K2_VERSION}
 LABEL kaldifeat_version=${KALDIFEAT_VERSION}
 LABEL github_repo="https://github.com/k2-fsa/icefall"
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        curl \
        vim \
    	libssl-dev \
        autoconf \
        automake \
        bzip2 \
        ca-certificates \
        ffmpeg \
        g++ \
        gfortran \
        git \
        libtool \
        make \
        patch \
        sox \
        subversion \
        unzip \
        valgrind \
        wget \
        zlib1g-dev \
        && rm -rf /var/lib/apt/lists/*
 # Install dependencies
 RUN pip install --no-cache-dir \
      torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \
      k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \
      git+https://github.com/lhotse-speech/lhotse \
      kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \
      kaldi_native_io \
      kaldialign \
      kaldifst \
      kaldilm \
      sentencepiece>=0.1.96 \
      tensorboard \
      typeguard \
      dill \
      onnx \
      onnxruntime \
      onnxmltools \
      onnxoptimizer \
      onnxsim \
      multi_quantization \
      typeguard \
      numpy \
      pytest \
      graphviz
 RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
    cd /workspace/icefall && \
    pip install --no-cache-dir -r requirements.txt
 ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
 WORKDIR /workspace/icefall
--- a/docs/README.md
+++ b/docs/README.md
@ -0,0 +1,24 @@
 ## Usage
 ```bash
 cd /path/to/icefall/docs
 pip install -r requirements.txt
 make clean
 make html
 cd build/html
 python3 -m http.server 8000
 ```
 It prints:
 ```
 Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
 ```
 Open your browser and go to <http://0.0.0.0:8000/> to view the generated
 documentation.
 Done!
 **Hint**: You can change the port number when starting the server.
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@ -1,2 +1,3 @@
 sphinx_rtd_theme
 sphinx
 sphinxcontrib-youtube==1.1.0
--- a/docs/source/_static/kaldi-align/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav
+++ b/docs/source/_static/kaldi-align/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav
--- a/docs/source/_static/kaldi-align/at.wav
+++ b/docs/source/_static/kaldi-align/at.wav
--- a/docs/source/_static/kaldi-align/beside.wav
+++ b/docs/source/_static/kaldi-align/beside.wav
--- a/docs/source/_static/kaldi-align/curiosity.wav
+++ b/docs/source/_static/kaldi-align/curiosity.wav
--- a/docs/source/_static/kaldi-align/had.wav
+++ b/docs/source/_static/kaldi-align/had.wav
--- a/docs/source/_static/kaldi-align/i.wav
+++ b/docs/source/_static/kaldi-align/i.wav
--- a/docs/source/_static/kaldi-align/me.wav
+++ b/docs/source/_static/kaldi-align/me.wav
--- a/docs/source/_static/kaldi-align/moment.wav
+++ b/docs/source/_static/kaldi-align/moment.wav
--- a/docs/source/_static/kaldi-align/that.wav
+++ b/docs/source/_static/kaldi-align/that.wav
--- a/docs/source/_static/kaldi-align/this.wav
+++ b/docs/source/_static/kaldi-align/this.wav
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -16,7 +16,6 @@
 import sphinx_rtd_theme
 # -- Project information -----------------------------------------------------
 project = "icefall"
@ -33,7 +32,9 @@ release = "0.1"
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    "sphinx.ext.todo",
    "sphinx_rtd_theme",
    "sphinxcontrib.youtube",
 ]
 # Add any paths that contain templates here, relative to this directory.
@ -73,5 +74,30 @@ html_context = {
    "github_user": "k2-fsa",
    "github_repo": "icefall",
    "github_version": "master",
-    "conf_py_path": "/icefall/docs/source/",
+    "conf_py_path": "/docs/source/",
 }
 todo_include_todos = True
 rst_epilog = """
 .. _sherpa-ncnn: https://github.com/k2-fsa/sherpa-ncnn
 .. _sherpa-onnx: https://github.com/k2-fsa/sherpa-onnx
 .. _icefall: https://github.com/k2-fsa/icefall
 .. _git-lfs: https://git-lfs.com/
 .. _ncnn: https://github.com/tencent/ncnn
 .. _LibriSpeech: https://www.openslr.org/12
 .. _Gigaspeech: https://github.com/SpeechColab/GigaSpeech
 .. _musan: http://www.openslr.org/17/
 .. _ONNX: https://github.com/onnx/onnx
 .. _onnxruntime: https://github.com/microsoft/onnxruntime
 .. _torch: https://github.com/pytorch/pytorch
 .. _torchaudio: https://github.com/pytorch/audio
 .. _k2: https://github.com/k2-fsa/k2
 .. _lhotse: https://github.com/lhotse-speech/lhotse
 .. _yesno: https://www.openslr.org/1/
 .. _Next-gen Kaldi: https://github.com/k2-fsa
 .. _Kaldi: https://github.com/kaldi-asr/kaldi
 .. _lilcom: https://github.com/danpovey/lilcom
 .. _CTC: https://www.cs.toronto.edu/~graves/icml_2006.pdf
 .. _kaldi-decoder: https://github.com/k2-fsa/kaldi-decoder
 """
--- a/docs/source/contributing/code-style.rst
+++ b/docs/source/contributing/code-style.rst
@ -11,9 +11,9 @@ We use the following tools to make the code style to be as consistent as possibl
 The following versions of the above tools are used:
-  - ``black == 12.6b0``
+  - ``black == 22.3.0``
-  - ``flake8 == 3.9.2``
+  - ``flake8 == 5.0.4``
-  - ``isort == 5.9.2``
+  - ``isort == 5.10.1``
 After running the following commands:
@ -38,7 +38,7 @@ Please fix any issues reported by the check tools.
 .. HINT::
  Some of the check tools, i.e., ``black`` and ``isort`` will modify
-  the files to be commited **in-place**. So please run ``git status``
+  the files to be committed **in-place**. So please run ``git status``
  after failure to see which file has been modified by the tools
  before you make any further changes.
@ -54,10 +54,17 @@ it should succeed this time:
 If you want to check the style of your code before ``git commit``, you
 can do the following:
  .. code-block:: bash
    $ pre-commit install
    $ pre-commit run
 Or without installing the pre-commit hooks:
  .. code-block:: bash
    $ cd icefall
-    $ pip install black==21.6b0 flake8==3.9.2 isort==5.9.2
+    $ pip install black==22.3.0 flake8==5.0.4 isort==5.10.1
    $ black --check your_changed_file.py
    $ black your_changed_file.py  # modify it in-place
    $
--- a/docs/source/contributing/how-to-create-a-recipe.rst
+++ b/docs/source/contributing/how-to-create-a-recipe.rst
@ -3,7 +3,7 @@ How to create a recipe
 .. HINT::
-  Please read :ref:`follow the code style` to adjust your code sytle.
+  Please read :ref:`follow the code style` to adjust your code style.
 .. CAUTION::
--- a/docs/source/decoding-with-langugage-models/LODR.rst
+++ b/docs/source/decoding-with-langugage-models/LODR.rst
@ -0,0 +1,187 @@
 .. _LODR:
 LODR for RNN Transducer
 =======================
 As a type of E2E model, neural transducers are usually considered as having an internal
 language model, which learns the language level information on the training corpus.
 In real-life scenario, there is often a mismatch between the training corpus and the target corpus space.
 This mismatch can be a problem when decoding for neural transducer models with language models as its internal
 language can act "against" the external LM. In this tutorial, we show how to use
 `Low-order Density Ratio <https://arxiv.org/abs/2203.16776>`_ to alleviate this effect to further improve the performance
 of langugae model integration.
 .. note::
    This tutorial is based on the recipe
    `pruned_transducer_stateless7_streaming <https://github.com/k2-fsa/icefall/tree/master/egs/librispeech/ASR/pruned_transducer_stateless7_streaming>`_,
    which is a streaming transducer model trained on `LibriSpeech`_.
    However, you can easily apply LODR to other recipes.
    If you encounter any problems, please open an issue here `icefall <https://github.com/k2-fsa/icefall/issues>`__.
 .. note::
    For simplicity, the training and testing corpus in this tutorial are the same (`LibriSpeech`_). However,
    you can change the testing set to any other domains (e.g `GigaSpeech`_) and prepare the language models
    using that corpus.
 First, let's have a look at some background information. As the predecessor of LODR, Density Ratio (DR) is first proposed `here <https://arxiv.org/abs/2002.11268>`_
 to address the language information mismatch between the training
 corpus (source domain) and the testing corpus (target domain). Assuming that the source domain and the test domain
 are acoustically similar, DR derives the following formula for decoding with Bayes' theorem:
 .. math::
    \text{score}\left(y_u|\mathit{x},y\right) =
    \log p\left(y_u|\mathit{x},y_{1:u-1}\right) +
    \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) -
    \lambda_2 \log p_{\text{Source LM}}\left(y_u|\mathit{x},y_{1:u-1}\right)
 where :math:`\lambda_1` and :math:`\lambda_2` are the weights of LM scores for target domain and source domain respectively.
 Here, the source domain LM is trained on the training corpus. The only difference in the above formula compared to
 shallow fusion is the subtraction of the source domain LM.
 Some works treat the predictor and the joiner of the neural transducer as its internal LM. However, the LM is
 considered to be weak and can only capture low-level language information. Therefore, `LODR <https://arxiv.org/abs/2203.16776>`__ proposed to use
 a low-order n-gram LM as an approximation of the ILM of the neural transducer. This leads to the following formula
 during decoding for transducer model:
 .. math::
    \text{score}\left(y_u|\mathit{x},y\right) =
    \log p_{rnnt}\left(y_u|\mathit{x},y_{1:u-1}\right) +
    \lambda_1 \log p_{\text{Target LM}}\left(y_u|\mathit{x},y_{1:u-1}\right) -
    \lambda_2 \log p_{\text{bi-gram}}\left(y_u|\mathit{x},y_{1:u-1}\right)
 In LODR, an additional bi-gram LM estimated on the source domain (e.g training corpus) is required. Compared to DR,
 the only difference lies in the choice of source domain LM. According to the original `paper <https://arxiv.org/abs/2203.16776>`_,
 LODR achieves similar performance compared to DR in both intra-domain and cross-domain settings.
 As a bi-gram is much faster to evaluate, LODR is usually much faster.
 Now, we will show you how to use LODR in ``icefall``.
 For illustration purpose, we will use a pre-trained ASR model from this `link <https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29>`_.
 If you want to train your model from scratch, please have a look at :ref:`non_streaming_librispeech_pruned_transducer_stateless`.
 The testing scenario here is intra-domain (we decode the model trained on `LibriSpeech`_ on `LibriSpeech`_ testing sets).
 As the initial step, let's download the pre-trained model.
 .. code-block:: bash
    $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
    $ cd icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
    $ git lfs pull --include "pretrained.pt"
    $ ln -s pretrained.pt epoch-99.pt # create a symbolic link so that the checkpoint can be loaded
    $ cd ../data/lang_bpe_500
    $ git lfs pull --include bpe.model
    $ cd ../../..
 To test the model, let's have a look at the decoding results **without** using LM. This can be done via the following command:
 .. code-block:: bash
    $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp/
    $ ./pruned_transducer_stateless7_streaming/decode.py \
        --epoch 99 \
        --avg 1 \
        --use-averaged-model False \
        --exp-dir $exp_dir \
        --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \
        --max-duration 600 \
        --decode-chunk-len 32 \
        --decoding-method modified_beam_search
 The following WERs are achieved on test-clean and test-other:
 .. code-block:: text
    $ For test-clean, WER of different settings are:
    $ beam_size_4	3.11	best for test-clean
    $ For test-other, WER of different settings are:
    $ beam_size_4	7.93	best for test-other
 Then, we download the external language model and bi-gram LM that are necessary for LODR.
 Note that the bi-gram is estimated on the LibriSpeech 960 hours' text.
 .. code-block:: bash
    $ # download the external LM
    $ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
    $ # create a symbolic link so that the checkpoint can be loaded
    $ pushd icefall-librispeech-rnn-lm/exp
    $ git lfs pull --include "pretrained.pt"
    $ ln -s pretrained.pt epoch-99.pt
    $ popd
    $
    $ # download the bi-gram
    $ git lfs install
    $ git clone https://huggingface.co/marcoyang/librispeech_bigram
    $ pushd data/lang_bpe_500
    $ ln -s ../../librispeech_bigram/2gram.fst.txt .
    $ popd
 Then, we perform LODR decoding by setting ``--decoding-method`` to ``modified_beam_search_lm_LODR``:
 .. code-block:: bash
    $ exp_dir=./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/exp
    $ lm_dir=./icefall-librispeech-rnn-lm/exp
    $ lm_scale=0.42
    $ LODR_scale=-0.24
    $ ./pruned_transducer_stateless7_streaming/decode.py \
        --epoch 99 \
        --avg 1 \
        --use-averaged-model False \
        --beam-size 4 \
        --exp-dir $exp_dir \
        --max-duration 600 \
        --decode-chunk-len 32 \
        --decoding-method modified_beam_search_LODR \
        --bpe-model ./icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29/data/lang_bpe_500/bpe.model \
        --use-shallow-fusion 1 \
        --lm-type rnn \
        --lm-exp-dir $lm_dir \
        --lm-epoch 99 \
        --lm-scale $lm_scale \
        --lm-avg 1 \
        --rnn-lm-embedding-dim 2048 \
        --rnn-lm-hidden-dim 2048 \
        --rnn-lm-num-layers 3 \
        --lm-vocab-size 500 \
        --tokens-ngram 2 \
        --ngram-lm-scale $LODR_scale
 There are two extra arguments that need to be given when doing LODR. ``--tokens-ngram`` specifies the order of n-gram. As we
 are using a bi-gram, we set it to 2. ``--ngram-lm-scale`` is the scale of the bi-gram, it should be a negative number
 as we are subtracting the bi-gram's score during decoding.
 The decoding results obtained with the above command are shown below:
 .. code-block:: text
    $ For test-clean, WER of different settings are:
    $ beam_size_4	2.61	best for test-clean
    $ For test-other, WER of different settings are:
    $ beam_size_4	6.74	best for test-other
 Recall that the lowest WER we obtained in :ref:`shallow_fusion` with beam size of 4 is ``2.77/7.08``, LODR
 indeed **further improves** the WER. We can do even better if we increase ``--beam-size``:
 .. list-table:: WER of LODR with different beam sizes
   :widths: 25 25 50
   :header-rows: 1
   * - Beam size
     - test-clean
     - test-other
   * - 4
     - 2.61
     - 6.74
   * - 8
     - 2.45
     - 6.38
   * - 12
     - 2.4
     - 6.23
--- a/Show More
+++ b/Show More