Merge remote-tracking branch 'dan/master' into rknn-zipformer2

This commit is contained in:
k2-fsa 2025-07-03 11:04:05 +08:00
commit 7e8e6a60b2
208 changed files with 2142 additions and 1553 deletions

View File

@ -55,9 +55,9 @@ RUN pip install --no-cache-dir \
"numpy<2.0" \
onnxoptimizer \
onnxsim \
onnx \
onnx==1.17.0 \
onnxmltools \
onnxruntime \
onnxruntime==1.17.1 \
piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
pypinyin==0.50.0 \
pytest \

View File

@ -63,23 +63,24 @@ def get_torchaudio_version(torch_version):
def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
k2_version = "1.24.4.dev20241029"
kaldifeat_version = "1.25.5.dev20241029"
version = "20241218"
k2_version = "1.24.4.dev20250630"
kaldifeat_version = "1.25.5.dev20250630"
version = "20250630"
# torchaudio 2.5.0 does not support python 3.13
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
torch_version = []
torch_version += ["1.13.0", "1.13.1"]
torch_version += ["2.0.0", "2.0.1"]
# torch_version += ["2.1.0", "2.1.1", "2.1.2"]
# torch_version += ["2.2.0", "2.2.1", "2.2.2"]
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
# Test only torch >= 2.3.0
torch_version += ["2.3.0", "2.3.1"]
torch_version += ["2.4.0"]
torch_version += ["2.4.1"]
torch_version += ["2.5.0"]
torch_version += ["2.5.1"]
torch_version += ["2.6.0", "2.7.0", "2.7.1"]
if specified_torch_version:
torch_version = [specified_torch_version]
@ -109,10 +110,6 @@ def get_matrix(min_torch_version, specified_torch_version, specified_python_vers
# torch>=2.5 requires python 3.10
continue
if t == "2.5.1":
k2_version_2 = "1.24.4.dev20241122"
kaldifeat_version_2 = "1.25.5.dev20241126"
else:
k2_version_2 = k2_version
kaldifeat_version_2 = kaldifeat_version

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
def main():
def get_v1_2_0_files():
prefix = (
"https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
)
@ -19,9 +19,70 @@ def main():
"piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
"piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
]
ans = [prefix + f for f in files]
ans.sort()
return ans
def get_v1_3_0_files():
prefix = (
"https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
)
files = [
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
"piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
"piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
"piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
"piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
]
ans = [prefix + f for f in files]
ans.sort()
return ans
def main():
files = get_v1_3_0_files() + get_v1_2_0_files()
with open("piper_phonemize.html", "w") as f:
for file in files:
url = prefix + file
for url in files:
file = url.split("/")[-1]
f.write(f'<a href="{url}">{file}</a><br/>\n')

View File

@ -1,200 +0,0 @@
#!/usr/bin/env bash
set -ex
git config --global user.name "k2-fsa"
git config --global user.email "csukuangfj@gmail.com"
git config --global lfs.allowincompletepush true
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
log "pwd: $PWD"
cd egs/multi_zh-hans/ASR
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp
git lfs pull --include pretrained.pt
ln -s pretrained.pt epoch-99.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "--------------------------------------------"
log "Export non-streaming ONNX transducer models "
log "--------------------------------------------"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--causal False
ls -lh $repo/exp
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
rm -rf $repo
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp/
git lfs pull --include pretrained.pt
rm -fv epoch-20.pt
rm -fv *.onnx
ln -s pretrained.pt epoch-20.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1
ls -lh $repo/exp/
log "------------------------------------------------------------"
log "Test exported streaming ONNX CTC models (greedy search) "
log "------------------------------------------------------------"
test_wavs=(
DEV_T0000000000.wav
DEV_T0000000001.wav
DEV_T0000000002.wav
TEST_MEETING_T0000000113.wav
TEST_MEETING_T0000000219.wav
TEST_MEETING_T0000000351.wav
)
for w in ${test_wavs[@]}; do
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
done
log "Upload onnx CTC models to huggingface"
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/ctc*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "bpe.model"
ls -lh
file bpe.model
git status
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0
ls -lh $repo/exp
log "------------------------------------------------------------"
log "Test exported streaming ONNX transducer models (Python code)"
log "------------------------------------------------------------"
log "test fp32"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "test int8"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "Upload onnx transducer models to huggingface"
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/encoder*.onnx $dst
cp -v $repo/exp/decoder*.onnx $dst
cp -v $repo/exp/joiner*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" bpe.model
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../

756
.github/scripts/multi_zh-hans/ASR/run.sh vendored Executable file
View File

@ -0,0 +1,756 @@
#!/usr/bin/env bash
set -ex
git config --global user.name "k2-fsa"
git config --global user.email "csukuangfj@gmail.com"
git config --global lfs.allowincompletepush true
python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
cd egs/multi_zh-hans/ASR
log "pwd: $PWD"
function run_2023_9_2() {
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp
git lfs pull --include pretrained.pt
ln -s pretrained.pt epoch-99.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "--------------------------------------------"
log "Export non-streaming ONNX transducer models "
log "--------------------------------------------"
./zipformer/export-onnx.py \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--causal False \
--fp16 1
ls -lh $repo/exp
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
./zipformer/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav \
$repo/test_wavs/DEV_T0000000001.wav \
$repo/test_wavs/DEV_T0000000002.wav \
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
$repo/test_wavs/TEST_MEETING_T0000000351.wav
rm -rf $repo
}
function run_2023_11_05_streaming() {
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
cd exp/
git lfs pull --include pretrained.pt
rm -fv epoch-20.pt
rm -fv *.onnx
ln -s pretrained.pt epoch-20.pt
cd ../data/lang_bpe_2000
ls -lh
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
git lfs pull --include "*.model"
ls -lh
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
--fp16 1
ls -lh $repo/exp/
log "------------------------------------------------------------"
log "Test exported streaming ONNX CTC models (greedy search) "
log "------------------------------------------------------------"
test_wavs=(
DEV_T0000000000.wav
DEV_T0000000001.wav
DEV_T0000000002.wav
TEST_MEETING_T0000000113.wav
TEST_MEETING_T0000000219.wav
TEST_MEETING_T0000000351.wav
)
for w in ${test_wavs[@]}; do
log "----fp32----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
log "----int8----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
log "----fp16----"
./zipformer/onnx_pretrained-streaming-ctc.py \
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/$w
done
log "Upload onnx CTC models to huggingface"
name=(
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
)
for n in ${name[@]}; do
url=https://huggingface.co/k2-fsa/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "bpe.model" "*.wav"
ls -lh
file bpe.model
git status
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo/exp \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 20 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
--fp16 1
ls -lh $repo/exp
log "------------------------------------------------------------"
log "Test exported streaming ONNX transducer models (Python code)"
log "------------------------------------------------------------"
log "test fp32"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "test int8"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
log "test fp16"
./zipformer/onnx_pretrained-streaming.py \
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
$repo/test_wavs/DEV_T0000000000.wav
name=(
sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "bpe.model" "*.wav"
ls -lh
file bpe.model
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
}
function run_2023_12_12_streaming() {
log "Upload onnx transducer models to huggingface"
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
cp -v $repo/exp/encoder*.onnx $dst
cp -v $repo/exp/decoder*.onnx $dst
cp -v $repo/exp/joiner*.onnx $dst
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" bpe.model "*.wav"
git add .
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
}
function run_yuekai_large() {
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include pretrained.pt
mv pretrained.pt epoch-99.pt
curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/ \
--tokens $repo/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
\
--num-encoder-layers 2,2,4,5,4,2 \
--feedforward-dim 768,1024,1536,2048,1536,768 \
--encoder-dim 256,384,512,768,512,256 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
\
--fp16 1 \
--use-whisper-features 1
ls -lh $repo/
pushd $repo
cat >README.md <<EOF
# Introduction
This model is converted
from
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
The training code can be found at
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
EOF
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
ls -lh *.onnx
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
popd
for w in 0.wav 1.wav 8k.wav; do
log "---fp32---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---int8---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.int8.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---fp16---"
sherpa-onnx \
--zipformer2-ctc-model=$repo/model.fp16.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
done
name=(
sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
cp -v $repo/model.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
cp -v $repo/model.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
cp -v $repo/model.fp16.onnx $dst
fi
cp -v $repo/tokens.txt $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm $repo/*.onnx
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo \
--tokens $repo/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
\
--num-encoder-layers 2,2,4,5,4,2 \
--feedforward-dim 768,1024,1536,2048,1536,768 \
--encoder-dim 256,384,512,768,512,256 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
\
--fp16 1 \
--use-whisper-features 1
ls -lh $repo
pushd $repo
for m in encoder decoder joiner; do
mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
done
ls -lh *.onnx
popd
for w in 0.wav 1.wav 8k.wav; do
log "---fp32---"
sherpa-onnx \
--encoder=$repo/encoder.onnx \
--decoder=$repo/decoder.onnx \
--joiner=$repo/joiner.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---int8---"
sherpa-onnx \
--encoder=$repo/encoder.int8.onnx \
--decoder=$repo/decoder.onnx \
--joiner=$repo/joiner.int8.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
log "---fp16---"
sherpa-onnx \
--encoder=$repo/encoder.fp16.onnx \
--decoder=$repo/decoder.fp16.onnx \
--joiner=$repo/joiner.fp16.onnx \
--tokens=$repo/tokens.txt \
$repo/test_wavs/$w
done
name=(
sherpa-onnx-streaming-zipformer-zh-2025-06-30
sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
cp -v $repo/encoder.onnx $dst
cp -v $repo/decoder.onnx $dst
cp -v $repo/joiner.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
cp -v $repo/encoder.int8.onnx $dst
cp -v $repo/decoder.onnx $dst
cp -v $repo/joiner.int8.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
cp -v $repo/encoder.fp16.onnx $dst
cp -v $repo/decoder.fp16.onnx $dst
cp -v $repo/joiner.fp16.onnx $dst
fi
cp -v $repo/tokens.txt $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v $repo/test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
}
function run_yuekai_xl() {
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo
git lfs pull --include pretrained.pt
git lfs pull --include data/lang_bpe_2000/bpe.model
mv pretrained.pt epoch-99.pt
ls -lh *.pt
popd
log "----------------------------------------"
log "Export streaming ONNX CTC models "
log "----------------------------------------"
./zipformer/export-onnx-streaming-ctc.py \
--exp-dir $repo/ \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 1 \
\
--num-encoder-layers 2,3,5,6,5,3 \
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
--encoder-dim 512,768,1024,1536,1024,512 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
--decoder-dim 768 --joiner-dim 768 \
--value-head-dim 18 \
--query-head-dim 48 \
--num-heads 4,4,4,8,4,4 \
\
--fp16 1 \
--use-whisper-features 1 \
--use-external-data 1
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
ls -lh *.onnx
mkdir test_wavs
pushd test_wavs
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
popd
for w in 0.wav 1.wav 8k.wav; do
log "---int8---"
sherpa-onnx \
--zipformer2-ctc-model=./model.int8.onnx \
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
test_wavs/$w
log "---fp16---"
sherpa-onnx \
--zipformer2-ctc-model=./model.fp16.onnx \
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
test_wavs/$w
done
pushd $repo
cat >README.md <<EOF
# Introduction
This model is converted
from
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
The training code can be found at
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
EOF
popd
name=(
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
cp -v model.fp16.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
cp -v model.int8.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v ./test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav" "bpe.model"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm -fv *.onnx *.weights
log "----------------------------------------"
log "Export streaming ONNX transducer models "
log "----------------------------------------"
./zipformer/export-onnx-streaming.py \
--exp-dir $repo/ \
--tokens $repo/data/lang_bpe_2000/tokens.txt \
--causal 1 \
--avg 1 \
--epoch 99 \
--use-averaged-model 0 \
--chunk-size 16 \
--left-context-frames 128 \
--use-ctc 0 \
\
--num-encoder-layers 2,3,5,6,5,3 \
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
--encoder-dim 512,768,1024,1536,1024,512 \
--encoder-unmasked-dim 192,192,256,320,256,192 \
--decoder-dim 768 --joiner-dim 768 \
--value-head-dim 18 \
--query-head-dim 48 \
--num-heads 4,4,4,8,4,4 \
\
--fp16 1 \
--use-whisper-features 1 \
--use-external-data 1
ls -lh *.onnx
ls -lh *.weights
mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
name=(
sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
)
for n in ${name[@]}; do
url=https://huggingface.co/csukuangfj/$n
GIT_LFS_SKIP_SMUDGE=1 git clone $url
dst=$(basename $url)
if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
cp -v encoder.fp16.onnx $dst
cp -v decoder.fp16.onnx $dst
cp -v joiner.fp16.onnx $dst
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
cp -v encoder.int8.onnx $dst
cp -v decoder.onnx $dst
cp -v joiner.int8.onnx $dst
fi
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
cp -v $repo/README.md $dst
mkdir -p $dst/test_wavs
cp -v ./test_wavs/*.wav $dst/test_wavs
cd $dst
git lfs track "*.onnx" "*.wav" "bpe.model"
ls -lh
git status
git add .
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
rm -rf .git
rm -fv .gitattributes
cd ..
ls -lh $dst
tar cjfv $dst.tar.bz2 $dst
ls -lh *.tar.bz2
mv -v $dst.tar.bz2 ../../../
done
rm -fv *.onnx *.weights
}
# run_yuekai_large
# run_yuekai_xl
# run_2023_9_2
run_2023_11_05_streaming
# run_2023_12_12_streaming

View File

@ -17,7 +17,7 @@ concurrency:
jobs:
generate_build_matrix:
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'aishell')
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
@ -31,8 +31,8 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
aishell:
needs: generate_build_matrix

View File

@ -30,8 +30,8 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
audioset:
@ -83,7 +83,7 @@ jobs:
ls -lh ./model-onnx/*
- name: Upload model to huggingface
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
@ -116,7 +116,7 @@ jobs:
rm -rf huggingface
- name: Prepare for release
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
run: |
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
@ -125,7 +125,7 @@ jobs:
ls -lh
- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true

View File

@ -31,8 +31,8 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
baker_zh:
@ -84,43 +84,43 @@ jobs:
ls -lh
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-2
path: ./model-steps-2.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-3
path: ./model-steps-3.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-4
path: ./model-steps-4.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-5
path: ./model-steps-5.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-6
path: ./model-steps-6.onnx
- name: Upload models to huggingface
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@ -141,7 +141,7 @@ jobs:
popd
- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true

View File

@ -34,7 +34,7 @@ concurrency:
jobs:
build-doc:
if: github.event.label.name == 'doc' || github.event_name == 'push'
# if: github.event.label.name == 'doc' || github.event_name == 'push'
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@ -43,7 +43,7 @@ jobs:
python-version: ["3.8"]
steps:
# refer to https://github.com/actions/checkout
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
fetch-depth: 0

View File

@ -29,8 +29,9 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
echo "::set-output name=matrix::${MATRIX}"
librispeech:
needs: generate_build_matrix

View File

@ -30,8 +30,8 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --min-torch-version "2.3")
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
ljspeech:
@ -83,13 +83,13 @@ jobs:
ls -lh
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav
- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
@ -100,37 +100,37 @@ jobs:
tag: tts-models
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-2
path: ./model-steps-2.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-3
path: ./model-steps-3.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-4
path: ./model-steps-4.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-5
path: ./model-steps-5.onnx
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
with:
name: step-6
path: ./model-steps-6.onnx
- name: Upload models to huggingface
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@ -155,7 +155,7 @@ jobs:
popd
- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true

View File

@ -1,4 +1,4 @@
name: run-multi-zh-hans
name: multi-zh-hans
on:
push:
@ -8,65 +8,72 @@ on:
workflow_dispatch:
concurrency:
group: run-multi-zh-hans-${{ github.ref }}
group: multi-zh-hans-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
jobs:
generate_build_matrix:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
echo "::set-output name=matrix::${MATRIX}"
multi-zh-hans:
runs-on: ${{ matrix.os }}
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: [3.8]
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
- name: Free space
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: export-model
- name: Test with multi_zh-hans
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
sudo apt-get -qq install git-lfs tree
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
export PYTHONPATH=/icefall:$PYTHONPATH
export HF_TOKEN=${{ secrets.HF_TOKEN }}
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/multi-zh-hans.sh
ls -lh
.github/scripts/multi_zh-hans/ASR/run.sh
- name: Show models
shell: bash
run: |
ls -lh *.tar.bz2
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
uses: svenstaro/upload-release-action@v2

View File

@ -30,8 +30,8 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
echo "::set-output name=matrix::${MATRIX}"
test:
needs: generate_build_matrix

View File

@ -30,8 +30,9 @@ jobs:
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.5.0")
echo "::set-output name=matrix::${MATRIX}"
yesno:
needs: generate_build_matrix

View File

@ -383,3 +383,7 @@ Please see: [![Open In Colab](https://colab.research.google.com/assets/colab-bad
[vctk]: egs/vctk/TTS
[ljspeech]: egs/ljspeech/TTS
[libritts_tts]: egs/libritts/TTS
## Acknowledgements
Some contributors to this project were supported by Xiaomi Corporation. Others were supported by National Science Foundation CCRI award 2120435. This is not an exhaustive list of sources of support.

View File

@ -79,7 +79,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -638,7 +644,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -912,7 +918,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -72,7 +72,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -688,7 +694,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -989,7 +995,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -184,7 +184,7 @@ class Transducer(nn.Module):
lm = simple_lm_proj(decoder_out)
am = simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -219,7 +219,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -94,7 +94,13 @@ from icefall.checkpoint import (
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -797,7 +803,7 @@ def train_one_epoch(
aishell = is_aishell(batch["supervisions"]["cut"][0])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1202,7 +1208,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -94,6 +94,7 @@ from icefall.utils import (
filter_uneven_sized_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -809,7 +810,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1206,7 +1207,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -87,6 +87,7 @@ from icefall.utils import (
setup_logger,
str2bool,
tokenize_by_CJK_char,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -802,7 +803,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1202,7 +1203,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -81,7 +81,13 @@ from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -812,7 +818,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1202,7 +1208,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -92,7 +92,7 @@ class AishellAsrDataModule:
group.add_argument(
"--num-buckets",
type=int,
default=30,
default=15,
help="The number of buckets for the DynamicBucketingSampler"
"(you might want to increase it for larger datasets).",
)
@ -275,8 +275,7 @@ class AishellAsrDataModule:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:

View File

@ -81,6 +81,7 @@ from icefall.utils import (
filter_uneven_sized_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -514,7 +515,7 @@ def compute_validation_loss(
tot_loss = MetricsTracker()
for batch_idx, batch in enumerate(valid_dl):
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
tokenizer=tokenizer,
@ -608,7 +609,7 @@ def train_one_epoch(
)
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
tokenizer=tokenizer,

View File

@ -95,6 +95,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -910,7 +911,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1302,7 +1303,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,
@ -1343,8 +1344,7 @@ def main():
run(rank=0, world_size=1, args=args)
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
if __name__ == "__main__":
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
main()

View File

@ -92,6 +92,7 @@ from icefall.utils import (
setup_logger,
str2bool,
tokenize_by_CJK_char,
torch_autocast,
)
@ -495,7 +496,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -895,7 +896,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,
@ -935,8 +936,7 @@ def main():
run(rank=0, world_size=1, args=args)
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
if __name__ == "__main__":
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
main()

View File

@ -104,7 +104,7 @@ class AiShell2AsrDataModule:
group.add_argument(
"--num-buckets",
type=int,
default=30,
default=15,
help="The number of buckets for the DynamicBucketingSampler"
"(you might want to increase it for larger datasets).",
)
@ -296,8 +296,7 @@ class AiShell2AsrDataModule:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:

View File

@ -90,7 +90,13 @@ from icefall.checkpoint import (
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -734,7 +740,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1062,7 +1068,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -83,7 +83,13 @@ from icefall.checkpoint import (
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -727,7 +733,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
# print(batch["supervisions"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1034,7 +1040,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -79,7 +79,13 @@ from icefall.checkpoint import save_checkpoint_with_global_batch_idx
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -638,7 +644,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -912,7 +918,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -73,7 +73,13 @@ from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -782,7 +788,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1127,7 +1133,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -71,7 +71,13 @@ from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -773,7 +779,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1134,7 +1140,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -76,7 +76,13 @@ from icefall.checkpoint import (
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -1067,7 +1073,7 @@ def train_one_epoch(
batch_size = batch["inputs"].shape[0]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,

View File

@ -76,7 +76,13 @@ from icefall.checkpoint import (
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -1058,7 +1064,7 @@ def train_one_epoch(
batch_size = batch["inputs"].shape[0]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,

View File

@ -74,6 +74,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -799,7 +800,7 @@ def train_one_epoch(
num_samples += batch_size
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1148,7 +1149,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -73,6 +73,8 @@ def compute_fbank_baker_zh(num_jobs: int):
f_min=0,
f_max=8000,
)
if not torch.cuda.is_available():
config.device = "cpu"
prefix = "baker_zh"
suffix = "jsonl.gz"

View File

@ -88,6 +88,7 @@ from icefall.utils import (
filter_uneven_sized_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -825,7 +826,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1220,7 +1221,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -90,6 +90,7 @@ from icefall.utils import (
filter_uneven_sized_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -895,7 +896,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1293,7 +1294,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -81,7 +81,13 @@ from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -840,7 +846,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1237,7 +1243,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -97,6 +97,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -969,7 +970,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1365,7 +1366,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -97,6 +97,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -604,7 +605,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -784,7 +785,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -83,7 +83,13 @@ from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
LOG_EPS = math.log(1e-10)
@ -838,7 +844,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1245,7 +1251,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -101,7 +101,7 @@ class GigaSpeechAsrDataModule:
group.add_argument(
"--num-buckets",
type=int,
default=30,
default=15,
help="The number of buckets for the DynamicBucketingSampler"
"(you might want to increase it for larger datasets).",
)
@ -294,8 +294,7 @@ class GigaSpeechAsrDataModule:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=self.args.num_buckets * 5000,
drop_last=True,
)
else:

View File

@ -77,7 +77,13 @@ from icefall.checkpoint import (
)
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -675,7 +681,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -944,7 +950,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -105,7 +105,7 @@ class GigaSpeechAsrDataModule:
group.add_argument(
"--num-buckets",
type=int,
default=100,
default=15,
help="The number of buckets for the DynamicBucketingSampler"
"(you might want to increase it for larger datasets).",
)
@ -311,8 +311,7 @@ class GigaSpeechAsrDataModule:
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
buffer_size=self.args.num_buckets * 5000,
drop_last=self.args.drop_last,
)
else:
@ -369,7 +368,7 @@ class GigaSpeechAsrDataModule:
cuts_valid,
max_duration=self.args.max_duration,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
buffer_size=self.args.num_buckets * 5000,
shuffle=False,
)
logging.info("About to create dev dataloader")

View File

@ -97,6 +97,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -958,7 +959,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1317,7 +1318,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -1,477 +0,0 @@
# Copyright 2021 Piotr Żelasko
# Copyright 2024 Xiaomi Corporation (Author: Wei Kang)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import inspect
import logging
import re
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, Optional
import lhotse
import torch
from lhotse import CutSet, Fbank, FbankConfig, load_manifest, load_manifest_lazy
from lhotse.dataset import (
CutConcatenate,
CutMix,
DynamicBucketingSampler,
K2SpeechRecognitionDataset,
PrecomputedFeatures,
SimpleCutSampler,
SpecAugment,
)
from lhotse.dataset.input_strategies import AudioSamples, OnTheFlyFeatures
from lhotse.utils import fix_random_seed
from torch.utils.data import DataLoader
from icefall.utils import str2bool
class _SeedWorkers:
def __init__(self, seed: int):
self.seed = seed
def __call__(self, worker_id: int):
fix_random_seed(self.seed + worker_id)
class GigaSpeechAsrDataModule:
"""
DataModule for k2 ASR experiments.
It assumes there is always one train and valid dataloader,
but there can be multiple test dataloaders (e.g. LibriSpeech test-clean
and test-other).
It contains all the common data pipeline modules used in ASR
experiments, e.g.:
- dynamic batch size,
- bucketing samplers,
- cut concatenation,
- augmentation,
- on-the-fly feature extraction
This class should be derived for specific corpora used in ASR tasks.
"""
def __init__(self, args: argparse.Namespace):
self.args = args
@classmethod
def add_arguments(cls, parser: argparse.ArgumentParser):
group = parser.add_argument_group(
title="ASR data related options",
description="These options are used for the preparation of "
"PyTorch DataLoaders from Lhotse CutSet's -- they control the "
"effective batch sizes, sampling strategies, applied data "
"augmentations, etc.",
)
group.add_argument(
"--manifest-dir",
type=Path,
default=Path("data/fbank"),
help="Path to directory with train/valid/test cuts.",
)
group.add_argument(
"--max-duration",
type=int,
default=200.0,
help="Maximum pooled recordings duration (seconds) in a "
"single batch. You can reduce it if it causes CUDA OOM.",
)
group.add_argument(
"--bucketing-sampler",
type=str2bool,
default=True,
help="When enabled, the batches will come from buckets of "
"similar duration (saves padding frames).",
)
group.add_argument(
"--num-buckets",
type=int,
default=30,
help="The number of buckets for the DynamicBucketingSampler"
"(you might want to increase it for larger datasets).",
)
group.add_argument(
"--concatenate-cuts",
type=str2bool,
default=False,
help="When enabled, utterances (cuts) will be concatenated "
"to minimize the amount of padding.",
)
group.add_argument(
"--duration-factor",
type=float,
default=1.0,
help="Determines the maximum duration of a concatenated cut "
"relative to the duration of the longest cut in a batch.",
)
group.add_argument(
"--gap",
type=float,
default=1.0,
help="The amount of padding (in seconds) inserted between "
"concatenated cuts. This padding is filled with noise when "
"noise augmentation is used.",
)
group.add_argument(
"--on-the-fly-feats",
type=str2bool,
default=False,
help="When enabled, use on-the-fly cut mixing and feature "
"extraction. Will drop existing precomputed feature manifests "
"if available.",
)
group.add_argument(
"--shuffle",
type=str2bool,
default=True,
help="When enabled (=default), the examples will be "
"shuffled for each epoch.",
)
group.add_argument(
"--drop-last",
type=str2bool,
default=True,
help="Whether to drop last batch. Used by sampler.",
)
group.add_argument(
"--return-cuts",
type=str2bool,
default=True,
help="When enabled, each batch will have the "
"field: batch['supervisions']['cut'] with the cuts that "
"were used to construct it.",
)
group.add_argument(
"--num-workers",
type=int,
default=2,
help="The number of training dataloader workers that "
"collect the batches.",
)
group.add_argument(
"--enable-spec-aug",
type=str2bool,
default=True,
help="When enabled, use SpecAugment for training dataset.",
)
group.add_argument(
"--spec-aug-time-warp-factor",
type=int,
default=80,
help="Used only when --enable-spec-aug is True. "
"It specifies the factor for time warping in SpecAugment. "
"Larger values mean more warping. "
"A value less than 1 means to disable time warp.",
)
group.add_argument(
"--enable-musan",
type=str2bool,
default=True,
help="When enabled, select noise from MUSAN and mix it"
"with training dataset. ",
)
group.add_argument(
"--input-strategy",
type=str,
default="PrecomputedFeatures",
help="AudioSamples or PrecomputedFeatures",
)
# GigaSpeech specific arguments
group.add_argument(
"--subset",
type=str,
default="XL",
help="Select the GigaSpeech subset (XS|S|M|L|XL)",
)
group.add_argument(
"--small-dev",
type=str2bool,
default=False,
help="Should we use only 1000 utterances for dev (speeds up training)",
)
def train_dataloaders(
self,
cuts_train: CutSet,
sampler_state_dict: Optional[Dict[str, Any]] = None,
) -> DataLoader:
"""
Args:
cuts_train:
CutSet for training.
sampler_state_dict:
The state dict for the training sampler.
"""
transforms = []
if self.args.enable_musan:
logging.info("Enable MUSAN")
logging.info("About to get Musan cuts")
cuts_musan = load_manifest(self.args.manifest_dir / "musan_cuts.jsonl.gz")
transforms.append(
CutMix(cuts=cuts_musan, p=0.5, snr=(10, 20), preserve_id=True)
)
else:
logging.info("Disable MUSAN")
if self.args.concatenate_cuts:
logging.info(
f"Using cut concatenation with duration factor "
f"{self.args.duration_factor} and gap {self.args.gap}."
)
# Cut concatenation should be the first transform in the list,
# so that if we e.g. mix noise in, it will fill the gaps between
# different utterances.
transforms = [
CutConcatenate(
duration_factor=self.args.duration_factor, gap=self.args.gap
)
] + transforms
input_transforms = []
if self.args.enable_spec_aug:
logging.info("Enable SpecAugment")
logging.info(f"Time warp factor: {self.args.spec_aug_time_warp_factor}")
# Set the value of num_frame_masks according to Lhotse's version.
# In different Lhotse's versions, the default of num_frame_masks is
# different.
num_frame_masks = 10
num_frame_masks_parameter = inspect.signature(
SpecAugment.__init__
).parameters["num_frame_masks"]
if num_frame_masks_parameter.default == 1:
num_frame_masks = 2
logging.info(f"Num frame mask: {num_frame_masks}")
input_transforms.append(
SpecAugment(
time_warp_factor=self.args.spec_aug_time_warp_factor,
num_frame_masks=num_frame_masks,
features_mask_size=27,
num_feature_masks=2,
frames_mask_size=100,
)
)
else:
logging.info("Disable SpecAugment")
logging.info("About to create train dataset")
train = K2SpeechRecognitionDataset(
input_strategy=eval(self.args.input_strategy)(),
cut_transforms=transforms,
input_transforms=input_transforms,
return_cuts=self.args.return_cuts,
)
if self.args.on_the_fly_feats:
# NOTE: the PerturbSpeed transform should be added only if we
# remove it from data prep stage.
# Add on-the-fly speed perturbation; since originally it would
# have increased epoch size by 3, we will apply prob 2/3 and use
# 3x more epochs.
# Speed perturbation probably should come first before
# concatenation, but in principle the transforms order doesn't have
# to be strict (e.g. could be randomized)
# transforms = [PerturbSpeed(factors=[0.9, 1.1], p=2/3)] + transforms # noqa
# Drop feats to be on the safe side.
train = K2SpeechRecognitionDataset(
cut_transforms=transforms,
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))),
input_transforms=input_transforms,
return_cuts=self.args.return_cuts,
)
if self.args.bucketing_sampler:
logging.info("Using DynamicBucketingSampler.")
train_sampler = DynamicBucketingSampler(
cuts_train,
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
num_buckets=self.args.num_buckets,
drop_last=self.args.drop_last,
buffer_size=self.args.num_buckets * 2000,
shuffle_buffer_size=self.args.num_buckets * 5000,
)
else:
logging.info("Using SimpleCutSampler.")
train_sampler = SimpleCutSampler(
cuts_train,
max_duration=self.args.max_duration,
shuffle=self.args.shuffle,
)
logging.info("About to create train dataloader")
if sampler_state_dict is not None:
logging.info("Loading sampler state dict")
train_sampler.load_state_dict(sampler_state_dict)
# 'seed' is derived from the current random state, which will have
# previously been set in the main process.
seed = torch.randint(0, 100000, ()).item()
worker_init_fn = _SeedWorkers(seed)
train_dl = DataLoader(
train,
sampler=train_sampler,
batch_size=None,
num_workers=self.args.num_workers,
persistent_workers=False,
worker_init_fn=worker_init_fn,
)
return train_dl
def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader:
transforms = []
if self.args.concatenate_cuts:
transforms = [
CutConcatenate(
duration_factor=self.args.duration_factor, gap=self.args.gap
)
] + transforms
logging.info("About to create dev dataset")
if self.args.on_the_fly_feats:
validate = K2SpeechRecognitionDataset(
cut_transforms=transforms,
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))),
return_cuts=self.args.return_cuts,
)
else:
validate = K2SpeechRecognitionDataset(
cut_transforms=transforms,
return_cuts=self.args.return_cuts,
)
valid_sampler = DynamicBucketingSampler(
cuts_valid,
max_duration=self.args.max_duration,
num_buckets=self.args.num_buckets,
buffer_size=self.args.num_buckets * 2000,
shuffle=False,
)
logging.info("About to create dev dataloader")
valid_dl = DataLoader(
validate,
sampler=valid_sampler,
batch_size=None,
num_workers=2,
persistent_workers=False,
)
return valid_dl
def test_dataloaders(self, cuts: CutSet) -> DataLoader:
logging.debug("About to create test dataset")
test = K2SpeechRecognitionDataset(
input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80)))
if self.args.on_the_fly_feats
else eval(self.args.input_strategy)(),
return_cuts=self.args.return_cuts,
)
sampler = DynamicBucketingSampler(
cuts,
max_duration=self.args.max_duration,
shuffle=False,
)
logging.debug("About to create test dataloader")
test_dl = DataLoader(
test,
batch_size=None,
sampler=sampler,
num_workers=self.args.num_workers,
)
return test_dl
@lru_cache()
def train_cuts(self) -> CutSet:
logging.info(f"About to get train {self.args.subset} cuts")
if self.args.subset == "XL":
filenames = glob.glob(
f"{self.args.manifest_dir}/XL_split/gigaspeech_cuts_XL.*.jsonl.gz"
)
pattern = re.compile(r"gigaspeech_cuts_XL.([0-9]+).jsonl.gz")
idx_filenames = ((int(pattern.search(f).group(1)), f) for f in filenames)
idx_filenames = sorted(idx_filenames, key=lambda x: x[0])
sorted_filenames = [f[1] for f in idx_filenames]
logging.info(
f"Loading GigaSpeech {len(sorted_filenames)} splits in lazy mode"
)
cuts_train = lhotse.combine(
lhotse.load_manifest_lazy(p) for p in sorted_filenames
)
else:
path = (
self.args.manifest_dir / f"gigaspeech_cuts_{self.args.subset}.jsonl.gz"
)
cuts_train = CutSet.from_jsonl_lazy(path)
return cuts_train
@lru_cache()
def dev_cuts(self) -> CutSet:
logging.info("About to get dev cuts")
cuts_valid = load_manifest_lazy(
self.args.manifest_dir / "gigaspeech_cuts_DEV.jsonl.gz"
)
if self.args.small_dev:
return cuts_valid.subset(first=1000)
else:
return cuts_valid
@lru_cache()
def test_cuts(self) -> CutSet:
logging.info("About to get test cuts")
return load_manifest_lazy(
self.args.manifest_dir / "gigaspeech_cuts_TEST.jsonl.gz"
)
@lru_cache()
def fsc_train_cuts(self) -> CutSet:
logging.info("About to get fluent speech commands train cuts")
return load_manifest_lazy(
self.args.manifest_dir / "fluent_speech_commands_cuts_train.jsonl.gz"
)
@lru_cache()
def fsc_valid_cuts(self) -> CutSet:
logging.info("About to get fluent speech commands valid cuts")
return load_manifest_lazy(
self.args.manifest_dir / "fluent_speech_commands_cuts_valid.jsonl.gz"
)
@lru_cache()
def fsc_test_small_cuts(self) -> CutSet:
logging.info("About to get fluent speech commands small test cuts")
return load_manifest_lazy(
self.args.manifest_dir / "fluent_speech_commands_cuts_small.jsonl.gz"
)
@lru_cache()
def fsc_test_large_cuts(self) -> CutSet:
logging.info("About to get fluent speech commands large test cuts")
return load_manifest_lazy(
self.args.manifest_dir / "fluent_speech_commands_cuts_large.jsonl.gz"
)

View File

@ -0,0 +1 @@
../../ASR/zipformer/asr_datamodule.py

View File

@ -97,6 +97,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -961,7 +962,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1320,7 +1321,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -77,7 +77,13 @@ from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.err import raise_grad_scale_is_too_small_error
from icefall.hooks import register_inf_check_hooks
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -805,7 +811,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1196,7 +1202,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -92,6 +92,7 @@ from icefall.utils import (
get_parameter_groups_with_lrs,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -942,7 +943,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1333,7 +1334,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -667,7 +667,9 @@ def main():
H = None
bpe_model = None
HLG = k2.Fsa.from_dict(
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
torch.load(
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
)
)
assert HLG.requires_grad is False
@ -707,7 +709,9 @@ def main():
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
else:
logging.info("Loading pre-compiled G_4_gram.pt")
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
d = torch.load(
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
)
G = k2.Fsa.from_dict(d)
if params.method in [

View File

@ -271,7 +271,7 @@ def main():
use_feat_batchnorm=params.use_feat_batchnorm,
)
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()
@ -351,7 +351,9 @@ def main():
"attention-decoder",
]:
logging.info(f"Loading HLG from {params.HLG}")
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
HLG = k2.Fsa.from_dict(
torch.load(params.HLG, map_location="cpu", weights_only=False)
)
HLG = HLG.to(device)
if not hasattr(HLG, "lm_scores"):
# For whole-lattice-rescoring and attention-decoder
@ -362,7 +364,9 @@ def main():
"attention-decoder",
]:
logging.info(f"Loading G from {params.G}")
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
G = k2.Fsa.from_dict(
torch.load(params.G, map_location="cpu", weights_only=False)
)
# Add epsilon self-loops to G as we will compose
# it with the whole lattice later
G = G.to(device)

View File

@ -774,7 +774,9 @@ def main():
H = None
bpe_model = None
HLG = k2.Fsa.from_dict(
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
torch.load(
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
)
)
assert HLG.requires_grad is False
@ -814,7 +816,9 @@ def main():
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
else:
logging.info("Loading pre-compiled G_4_gram.pt")
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
d = torch.load(
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
)
G = k2.Fsa.from_dict(d)
if params.method in [

View File

@ -65,7 +65,6 @@ from lhotse.dataset.sampling.base import CutSampler
from lhotse.utils import fix_random_seed
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -84,9 +83,11 @@ from icefall.lexicon import Lexicon
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
encode_supervisions,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -420,7 +421,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -629,7 +630,7 @@ def train_one_epoch(
scheduler: LRSchedulerType,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -676,7 +677,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -965,7 +966,7 @@ def run(rank, world_size, args):
params=params,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1036,7 +1037,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -868,7 +868,9 @@ def main():
H = None
bpe_model = None
HLG = k2.Fsa.from_dict(
torch.load(f"{params.lang_dir}/HLG.pt", map_location=device)
torch.load(
f"{params.lang_dir}/HLG.pt", map_location=device, weights_only=False
)
)
assert HLG.requires_grad is False
@ -907,7 +909,9 @@ def main():
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
else:
logging.info("Loading pre-compiled G_4_gram.pt")
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location=device)
d = torch.load(
params.lm_dir / "G_4_gram.pt", map_location=device, weights_only=False
)
G = k2.Fsa.from_dict(d)
if params.decoding_method == "whole-lattice-rescoring":

View File

@ -334,7 +334,9 @@ def main():
"whole-lattice-rescoring",
]:
logging.info(f"Loading HLG from {params.HLG}")
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
HLG = k2.Fsa.from_dict(
torch.load(params.HLG, map_location="cpu", weights_only=False)
)
HLG = HLG.to(device)
if not hasattr(HLG, "lm_scores"):
# For whole-lattice-rescoring and attention-decoder
@ -345,7 +347,9 @@ def main():
"whole-lattice-rescoring",
]:
logging.info(f"Loading G from {params.G}")
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
G = k2.Fsa.from_dict(
torch.load(params.G, map_location="cpu", weights_only=False)
)
G = G.to(device)
if params.method == "whole-lattice-rescoring":
# Add epsilon self-loops to G as we will compose

View File

@ -290,7 +290,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()
@ -386,7 +386,9 @@ def main():
"whole-lattice-rescoring",
]:
logging.info(f"Loading HLG from {params.HLG}")
HLG = k2.Fsa.from_dict(torch.load(params.HLG, map_location="cpu"))
HLG = k2.Fsa.from_dict(
torch.load(params.HLG, map_location="cpu", weights_only=False)
)
HLG = HLG.to(device)
if not hasattr(HLG, "lm_scores"):
# For whole-lattice-rescoring and attention-decoder
@ -397,7 +399,9 @@ def main():
"whole-lattice-rescoring",
]:
logging.info(f"Loading G from {params.G}")
G = k2.Fsa.from_dict(torch.load(params.G, map_location="cpu"))
G = k2.Fsa.from_dict(
torch.load(params.G, map_location="cpu", weights_only=False)
)
G = G.to(device)
if params.method == "whole-lattice-rescoring":
# Add epsilon self-loops to G as we will compose

View File

@ -76,7 +76,6 @@ from lhotse.utils import fix_random_seed
from model import CTCModel
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -95,9 +94,11 @@ from icefall.lexicon import Lexicon
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
encode_supervisions,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -493,7 +494,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -694,7 +695,7 @@ def train_one_epoch(
graph_compiler: Union[BpeCtcTrainingGraphCompiler, CtcTrainingGraphCompiler],
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -743,7 +744,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1004,7 +1005,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1073,7 +1074,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -574,7 +574,9 @@ def main():
H = None
bpe_model = None
HLG = k2.Fsa.from_dict(
torch.load(f"{params.lang_dir}/HLG.pt", map_location="cpu")
torch.load(
f"{params.lang_dir}/HLG.pt", map_location="cpu", weights_only=False
)
)
HLG = HLG.to(device)
assert HLG.requires_grad is False
@ -609,7 +611,9 @@ def main():
torch.save(G.as_dict(), params.lm_dir / "G_4_gram.pt")
else:
logging.info("Loading pre-compiled G_4_gram.pt")
d = torch.load(params.lm_dir / "G_4_gram.pt", map_location="cpu")
d = torch.load(
params.lm_dir / "G_4_gram.pt", map_location="cpu", weights_only=False
)
G = k2.Fsa.from_dict(d).to(device)
if params.method in ["whole-lattice-rescoring", "attention-decoder"]:

View File

@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -93,7 +92,14 @@ from icefall.checkpoint import (
)
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -560,7 +566,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -727,7 +733,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -772,7 +778,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1002,7 +1008,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1071,7 +1077,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -93,7 +92,14 @@ from icefall.checkpoint import (
)
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -560,7 +566,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -727,7 +733,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -772,7 +778,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1001,7 +1007,7 @@ def run(rank, world_size, args):
params=params,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1072,7 +1078,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -72,11 +72,11 @@ def compile_HLG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
max_token_id = max(lexicon.tokens)
logging.info(f"Building ctc_topo. max_token_id: {max_token_id}")
H = k2.ctc_topo(max_token_id)
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
if Path(f"data/lm/{lm}.pt").is_file():
logging.info(f"Loading pre-compiled {lm}")
d = torch.load(f"data/lm/{lm}.pt")
d = torch.load(f"data/lm/{lm}.pt", weights_only=False)
G = k2.Fsa.from_dict(d)
else:
logging.info(f"Loading {lm}.fst.txt")

View File

@ -66,11 +66,11 @@ def compile_LG(lang_dir: str, lm: str = "G_3_gram") -> k2.Fsa:
An FSA representing LG.
"""
lexicon = Lexicon(lang_dir)
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt"))
L = k2.Fsa.from_dict(torch.load(f"{lang_dir}/L_disambig.pt", weights_only=False))
if Path(f"data/lm/{lm}.pt").is_file():
logging.info(f"Loading pre-compiled {lm}")
d = torch.load(f"data/lm/{lm}.pt")
d = torch.load(f"data/lm/{lm}.pt", weights_only=False)
G = k2.Fsa.from_dict(d)
else:
logging.info(f"Loading {lm}.fst.txt")

View File

@ -750,7 +750,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -156,7 +156,7 @@ class Transducer(nn.Module):
lm = self.simple_lm_proj(decoder_out)
am = self.simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -192,7 +192,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -238,7 +238,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -66,7 +66,6 @@ from lstm import RNN
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -82,9 +81,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -521,7 +522,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -717,7 +718,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -763,7 +764,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1023,7 +1024,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1092,7 +1093,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -935,7 +935,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -195,7 +195,7 @@ class Transducer(nn.Module):
lm = simple_lm_proj(decoder_out)
am = simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -231,7 +231,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -241,7 +241,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -74,7 +74,6 @@ from lstm import RNN
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -90,9 +89,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -560,7 +561,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -772,7 +773,7 @@ def train_one_epoch(
giga_train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
rng: random.Random,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -848,7 +849,7 @@ def train_one_epoch(
libri = is_libri(batch["supervisions"]["cut"][0])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1176,7 +1177,7 @@ def run(rank, world_size, args):
else:
logging.info("Skip scan_pessimistic_batches_for_oom")
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1247,7 +1248,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -815,7 +815,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -239,7 +239,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -66,7 +66,6 @@ from lstm import RNN
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -82,9 +81,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -551,7 +552,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -747,7 +748,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -793,7 +794,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1067,7 +1068,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1136,7 +1137,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_.autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -21,7 +21,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -141,7 +141,7 @@ class Transducer(nn.Module):
lm = self.simple_lm_proj(decoder_out)
am = self.simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -176,7 +176,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -10,9 +10,11 @@ from typing import Optional, Tuple
import torch
from scaling import ScaledLinear
from torch import Tensor, nn
from torch.cuda.amp import GradScaler, custom_bwd, custom_fwd
from torch.cuda.amp import custom_bwd, custom_fwd
from torch_scheduled_sampling import sample_combined
from icefall.utils import create_grad_scaler, torch_autocast
# The main exports of this file are the module KnowledgeBaseLookup and the
# function create_knowledge_base.
@ -330,14 +332,14 @@ def _test_knowledge_base_lookup_autocast():
optimizer = Eve(m.parameters(), lr=0.005, eps=1.0e-04)
m = m.to(device)
scaler = GradScaler(enabled=True)
scaler = create_grad_scaler(enabled=True)
start = timeit.default_timer()
for epoch in range(150):
for n, (x, y) in enumerate(train_pairs):
y_out = m(x)
with torch.cuda.amp.autocast(enabled=True):
with torch_autocast(enabled=True):
loss = ((y_out - y) ** 2).mean() * 100.0
if n % 10 == 0 and epoch % 10 == 0:
print(f"Epoch {epoch}, batch {n}, loss {loss.item()}")

View File

@ -66,7 +66,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -76,7 +75,14 @@ from icefall.checkpoint import save_checkpoint as save_checkpoint_impl
from icefall.checkpoint import save_checkpoint_with_global_batch_idx
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
create_grad_scaler,
AttributeDict,
MetricsTracker,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -453,7 +459,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -608,7 +614,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
rank: int = 0,
@ -650,7 +656,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -868,7 +874,7 @@ def run(rank, world_size, args):
params=params,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -937,7 +943,7 @@ def scan_pessimistic_batches_for_oom(
# warmup = 0.0 is so that the derivs for the pruned loss stay zero
# (i.e. are not remembered by the decaying-average in adam), because
# we want to avoid these params being subject to shrinkage in adam.
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -55,7 +55,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from noam import Noam
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -68,7 +67,14 @@ from icefall.checkpoint import (
)
from icefall.dist import cleanup_dist, setup_dist
from icefall.env import get_env_info
from icefall.utils import AttributeDict, MetricsTracker, setup_logger, str2bool
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
setup_logger,
str2bool,
torch_autocast,
)
def add_model_arguments(parser: argparse.ArgumentParser):
@ -496,7 +502,7 @@ def save_checkpoint(
model_avg: Optional[nn.Module] = None,
optimizer: Optional[torch.optim.Optimizer] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, and training stats to file.
@ -650,7 +656,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -693,7 +699,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -939,7 +945,7 @@ def run(rank, world_size, args):
params=params,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1004,7 +1010,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -741,7 +741,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -1347,7 +1347,10 @@ def modified_beam_search(
(
context_score,
new_context_state,
) = context_graph.forward_one_step(hyp.context_state, new_token)
_,
) = context_graph.forward_one_step(
hyp.context_state, new_token, strict_mode=False
)
new_log_prob = topk_log_probs[k] + context_score
@ -2853,7 +2856,10 @@ def modified_beam_search_LODR(
(
context_score,
new_context_state,
) = context_graph.forward_one_step(hyp.context_state, new_token)
_,
) = context_graph.forward_one_step(
hyp.context_state, new_token, strict_mode=False
)
ys.append(new_token)
state_cost = hyp.state_cost.forward_one_step(new_token)

View File

@ -754,7 +754,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -157,7 +157,7 @@ class Transducer(nn.Module):
lm = self.simple_lm_proj(decoder_out)
am = self.simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -193,7 +193,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -265,7 +265,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -78,7 +78,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -91,9 +90,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -523,7 +524,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -716,7 +717,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
rank: int = 0,
@ -759,7 +760,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1000,7 +1001,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 0 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1067,7 +1068,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -921,7 +921,7 @@ def load_ngram_LM(
if pt_file.is_file():
logging.info(f"Loading pre-compiled {pt_file}")
d = torch.load(pt_file, map_location=device)
d = torch.load(pt_file, map_location=device, weights_only=False)
G = k2.Fsa.from_dict(d)
G = k2.add_epsilon_self_loops(G)
G = k2.arc_sort(G)
@ -1101,7 +1101,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
elif params.decoding_method in [

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -195,7 +195,7 @@ class Transducer(nn.Module):
lm = simple_lm_proj(decoder_out)
am = simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -231,7 +231,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -274,7 +274,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -74,7 +74,6 @@ from librispeech import LibriSpeech
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -87,9 +86,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -546,7 +547,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -755,7 +756,7 @@ def train_one_epoch(
giga_train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
rng: random.Random,
scaler: GradScaler,
scaler: "GradScaler",
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
rank: int = 0,
@ -827,7 +828,7 @@ def train_one_epoch(
libri = is_libri(batch["supervisions"]["cut"][0])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1126,7 +1127,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 0 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1195,7 +1196,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -913,7 +913,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -96,9 +95,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -548,7 +549,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -744,7 +745,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -789,7 +790,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1047,7 +1048,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1116,7 +1117,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -972,7 +972,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -238,7 +238,7 @@ def main():
num_param = sum([p.numel() for p in model.parameters()])
logging.info(f"Number of model parameters: {num_param}")
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -68,7 +68,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -84,9 +83,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -571,7 +572,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -768,7 +769,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -814,7 +815,7 @@ def train_one_epoch(
batch_size = len(batch["supervisions"]["text"])
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1078,7 +1079,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1147,7 +1148,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -23,7 +23,7 @@ import torch.nn as nn
from encoder_interface import EncoderInterface
from scaling import ScaledLinear
from icefall.utils import add_sos
from icefall.utils import add_sos, torch_autocast
class Transducer(nn.Module):
@ -185,7 +185,7 @@ class Transducer(nn.Module):
lm = self.simple_lm_proj(decoder_out)
am = self.simple_am_proj(encoder_out)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
simple_loss, (px_grad, py_grad) = k2.rnnt_loss_smoothed(
lm=lm.float(),
am=am.float(),
@ -220,7 +220,7 @@ class Transducer(nn.Module):
# prior to do_rnnt_pruning (this is an optimization for speed).
logits = self.joiner(am_pruned, lm_pruned, project_input=False)
with torch.cuda.amp.autocast(enabled=False):
with torch_autocast(enabled=False):
pruned_loss = k2.rnnt_loss_pruned(
logits=logits.float(),
symbols=y_padded,

View File

@ -80,7 +80,6 @@ from lhotse.utils import fix_random_seed
from model import Transducer
from optim import Eden, Eve
from torch import Tensor
from torch.cuda.amp import GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
@ -96,9 +95,11 @@ from icefall.env import get_env_info
from icefall.utils import (
AttributeDict,
MetricsTracker,
create_grad_scaler,
display_and_save_batch,
setup_logger,
str2bool,
torch_autocast,
)
LRSchedulerType = Union[torch.optim.lr_scheduler._LRScheduler, optim.LRScheduler]
@ -519,7 +520,7 @@ def save_checkpoint(
optimizer: Optional[torch.optim.Optimizer] = None,
scheduler: Optional[LRSchedulerType] = None,
sampler: Optional[CutSampler] = None,
scaler: Optional[GradScaler] = None,
scaler: Optional["GradScaler"] = None,
rank: int = 0,
) -> None:
"""Save model, optimizer, scheduler and training stats to file.
@ -736,7 +737,7 @@ def train_one_epoch(
sp: spm.SentencePieceProcessor,
train_dl: torch.utils.data.DataLoader,
valid_dl: torch.utils.data.DataLoader,
scaler: GradScaler,
scaler: "GradScaler",
model_avg: Optional[nn.Module] = None,
tb_writer: Optional[SummaryWriter] = None,
world_size: int = 1,
@ -781,7 +782,7 @@ def train_one_epoch(
params.batch_idx_train += 1
batch_size = len(batch["supervisions"]["text"])
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, loss_info = compute_loss(
params=params,
model=model,
@ -1039,7 +1040,7 @@ def run(rank, world_size, args):
warmup=0.0 if params.start_epoch == 1 else 1.0,
)
scaler = GradScaler(enabled=params.use_fp16)
scaler = create_grad_scaler(enabled=params.use_fp16)
if checkpoints and "grad_scaler" in checkpoints:
logging.info("Loading grad scaler state dict")
scaler.load_state_dict(checkpoints["grad_scaler"])
@ -1108,7 +1109,7 @@ def scan_pessimistic_batches_for_oom(
for criterion, cuts in batches.items():
batch = train_dl.dataset[cuts]
try:
with torch.cuda.amp.autocast(enabled=params.use_fp16):
with torch_autocast(enabled=params.use_fp16):
loss, _ = compute_loss(
params=params,
model=model,

View File

@ -348,7 +348,9 @@ class CodebookIndexExtractor:
num_codebooks=self.params.num_codebooks,
codebook_size=256,
)
quantizer.load_state_dict(torch.load(self.quantizer_file_path))
quantizer.load_state_dict(
torch.load(self.quantizer_file_path, weights_only=False)
)
quantizer.to(self.params.device)
return quantizer

View File

@ -289,7 +289,7 @@ def main():
logging.info("About to create model")
model = get_transducer_model(params)
checkpoint = torch.load(args.checkpoint, map_location="cpu")
checkpoint = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
model.load_state_dict(checkpoint["model"], strict=False)
model.to(device)
model.eval()

View File

@ -910,7 +910,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

View File

@ -813,7 +813,7 @@ def main():
lg_filename = params.lang_dir / "LG.pt"
logging.info(f"Loading {lg_filename}")
decoding_graph = k2.Fsa.from_dict(
torch.load(lg_filename, map_location=device)
torch.load(lg_filename, map_location=device, weights_only=False)
)
decoding_graph.scores *= params.ngram_lm_scale
else:

Some files were not shown because too many files have changed in this diff Show More